#Load packages
library(car)
## Loading required package: carData
library(stargazer)
##
## Please cite as:
## Hlavac, Marek (2018). stargazer: Well-Formatted Regression and Summary Statistics Tables.
## R package version 5.2.2. https://CRAN.R-project.org/package=stargazer
library(survey)
## Loading required package: grid
## Loading required package: Matrix
## Loading required package: survival
##
## Attaching package: 'survey'
## The following object is masked from 'package:graphics':
##
## dotchart
library(sjPlot)
## Registered S3 methods overwritten by 'lme4':
## method from
## cooks.distance.influence.merMod car
## influence.merMod car
## dfbeta.influence.merMod car
## dfbetas.influence.merMod car
library(ggplot2)
library(haven)
#Load BRFSS
BRFSS19<-read_xpt("C:/Users/adolp/Desktop/Statistics 2/BRFSS/LLCP2019.XPT")
#Fix BRFSS names
nams<-names(BRFSS19)
head(BRFSS19, n=10)
## # A tibble: 10 x 342
## `_STATE` FMONTH IDATE IMONTH IDAY IYEAR DISPCODE SEQNO `_PSU` CTELENM1
## <dbl> <dbl> <chr> <chr> <chr> <chr> <dbl> <chr> <dbl> <dbl>
## 1 1 1 0118~ 01 18 2019 1100 2019~ 2.02e9 1
## 2 1 1 0113~ 01 13 2019 1100 2019~ 2.02e9 1
## 3 1 1 0118~ 01 18 2019 1100 2019~ 2.02e9 1
## 4 1 1 0118~ 01 18 2019 1200 2019~ 2.02e9 1
## 5 1 1 0104~ 01 04 2019 1100 2019~ 2.02e9 1
## 6 1 1 0118~ 01 18 2019 1200 2019~ 2.02e9 1
## 7 1 1 0104~ 01 04 2019 1100 2019~ 2.02e9 1
## 8 1 1 0123~ 01 23 2019 1100 2019~ 2.02e9 1
## 9 1 1 0124~ 01 24 2019 1100 2019~ 2.02e9 1
## 10 1 1 0113~ 01 13 2019 1100 2019~ 2.02e9 1
## # ... with 332 more variables: PVTRESD1 <dbl>, COLGHOUS <dbl>, STATERE1 <dbl>,
## # CELPHONE <dbl>, LADULT1 <dbl>, COLGSEX <dbl>, NUMADULT <dbl>,
## # LANDSEX <dbl>, NUMMEN <dbl>, NUMWOMEN <dbl>, RESPSLCT <dbl>,
## # SAFETIME <dbl>, CTELNUM1 <dbl>, CELLFON5 <dbl>, CADULT1 <dbl>,
## # CELLSEX <dbl>, PVTRESD3 <dbl>, CCLGHOUS <dbl>, CSTATE1 <dbl>,
## # LANDLINE <dbl>, HHADULT <dbl>, SEXVAR <dbl>, GENHLTH <dbl>, PHYSHLTH <dbl>,
## # MENTHLTH <dbl>, POORHLTH <dbl>, HLTHPLN1 <dbl>, PERSDOC2 <dbl>,
## # MEDCOST <dbl>, CHECKUP1 <dbl>, BPHIGH4 <dbl>, BPMEDS <dbl>, CHOLCHK2 <dbl>,
## # TOLDHI2 <dbl>, CHOLMED2 <dbl>, CVDINFR4 <dbl>, CVDCRHD4 <dbl>,
## # CVDSTRK3 <dbl>, ASTHMA3 <dbl>, ASTHNOW <dbl>, CHCSCNCR <dbl>,
## # CHCOCNCR <dbl>, CHCCOPD2 <dbl>, ADDEPEV3 <dbl>, CHCKDNY2 <dbl>,
## # DIABETE4 <dbl>, DIABAGE3 <dbl>, HAVARTH4 <dbl>, ARTHEXER <dbl>,
## # ARTHEDU <dbl>, LMTJOIN3 <dbl>, ARTHDIS2 <dbl>, JOINPAI2 <dbl>,
## # MARITAL <dbl>, EDUCA <dbl>, RENTHOM1 <dbl>, NUMHHOL3 <dbl>, NUMPHON3 <dbl>,
## # CPDEMO1B <dbl>, VETERAN3 <dbl>, EMPLOY1 <dbl>, CHILDREN <dbl>,
## # INCOME2 <dbl>, WEIGHT2 <dbl>, HEIGHT3 <dbl>, PREGNANT <dbl>, DEAF <dbl>,
## # BLIND <dbl>, DECIDE <dbl>, DIFFWALK <dbl>, DIFFDRES <dbl>, DIFFALON <dbl>,
## # SMOKE100 <dbl>, SMOKDAY2 <dbl>, STOPSMK2 <dbl>, LASTSMK2 <dbl>,
## # USENOW3 <dbl>, ALCDAY5 <dbl>, AVEDRNK3 <dbl>, DRNK3GE5 <dbl>,
## # MAXDRNKS <dbl>, EXERANY2 <dbl>, EXRACT11 <dbl>, EXEROFT1 <dbl>,
## # EXERHMM1 <dbl>, EXRACT21 <dbl>, EXEROFT2 <dbl>, EXERHMM2 <dbl>,
## # STRENGTH <dbl>, FRUIT2 <dbl>, FRUITJU2 <dbl>, FVGREEN1 <dbl>,
## # FRENCHF1 <dbl>, POTATOE1 <dbl>, VEGETAB2 <dbl>, FLUSHOT7 <dbl>,
## # FLSHTMY3 <dbl>, TETANUS1 <dbl>, PNEUVAC4 <dbl>, HIVTST7 <dbl>, ...
newnames<-tolower(gsub(pattern = "_",replacement = "",x = nams))
names(BRFSS19)<-newnames
#Outcome variable
BRFSS19$Drank<-Recode(BRFSS19$drnk3ge5, recodes = "88=0; 77=NA; 99=NA")
hist(BRFSS19$Drank)
summary(BRFSS19$Drank)
## Min. 1st Qu. Median Mean 3rd Qu. Max. NA's
## 0.00 0.00 0.00 1.25 1.00 76.00 223396
Analyses
#Recode variables
BRFSS19$Black<-Recode(BRFSS19$racegr3, recodes="2=1; 9=NA; else=0")
BRFSS19$White<-Recode(BRFSS19$racegr3, recodes="1=1; 9=NA; else=0")
BRFSS19$Other<-Recode(BRFSS19$racegr3, recodes="3:4=1; 9=NA; else=0")
BRFSS19$Hispanic<-Recode(BRFSS19$racegr3, recodes="5=1; 9=NA; else=0")
BRFSS19$Race_eth<-Recode(BRFSS19$racegr3,
recodes="1='nhwhite'; 2='nh black'; 3='nh other';4='nh multirace'; 5='hispanic'; else=NA",
as.factor = T)
BRFSS19$Alc<-Recode(BRFSS19$acedrink, recodes ="7:9=NA; 1=1;2=0")
BRFSS19$Dep<-Recode(BRFSS19$acedeprs, recodes ="7:9=NA; 1=1;2=0")
BRFSS19$Gen<-Recode(BRFSS19$birthsex, recodes = "7:9=NA; 1=1;2=0")
#Subset data
library(dplyr)
##
## Attaching package: 'dplyr'
## The following object is masked from 'package:car':
##
## recode
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
subset<-BRFSS19%>%
select(Drank,Alc,Dep,Gen,Race_eth,Black, White, Other, Hispanic,llcpwt,ststr) %>%
filter( complete.cases(.))
options(survey.lonely.psu = "adjust")
des<-svydesign(ids=~1, strata=~ststr,
weights=~llcpwt,
data = BRFSS19[is.na(BRFSS19$llcpwt)==F,])
Poisson Regression
#Descriptive Statistics
svyhist(~Drank, des)
svyby(~Drank, ~Race_eth+Alc, des, svymean, na.rm=T)
## Race_eth Alc Drank se
## hispanic.0 hispanic 0 1.125233 0.10477654
## nh black.0 nh black 0 1.208418 0.10925663
## nh multirace.0 nh multirace 0 2.360275 0.37950153
## nh other.0 nh other 0 1.082813 0.15921920
## nhwhite.0 nhwhite 0 1.315679 0.03702244
## hispanic.1 hispanic 1 2.401547 0.32206941
## nh black.1 nh black 1 2.468785 0.44361630
## nh multirace.1 nh multirace 1 3.322718 0.76812281
## nh other.1 nh other 1 3.616181 0.71655462
## nhwhite.1 nhwhite 1 1.970008 0.08264054
svyby(~Drank, ~Dep, des, svymean, na.rm=T)
## Dep Drank se
## 0 0 1.357362 0.03290095
## 1 1 2.004254 0.09652755
#Poisson GLM Fit Survey Data
fit1<-svyglm(Drank~factor(Race_eth)+factor(Dep)+factor(Alc), design=des, family=poisson)
summary(fit1)
##
## Call:
## svyglm(formula = Drank ~ factor(Race_eth) + factor(Dep) + factor(Alc),
## design = des, family = poisson)
##
## Survey design:
## svydesign(ids = ~1, strata = ~ststr, weights = ~llcpwt, data = BRFSS19[is.na(BRFSS19$llcpwt) ==
## F, ])
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 0.17007 0.07959 2.137 0.032623 *
## factor(Race_eth)nh black 0.07481 0.11981 0.624 0.532379
## factor(Race_eth)nh multirace 0.57740 0.15555 3.712 0.000206 ***
## factor(Race_eth)nh other 0.17080 0.14818 1.153 0.249075
## factor(Race_eth)nhwhite 0.03554 0.08226 0.432 0.665652
## factor(Dep)1 0.23917 0.05373 4.451 8.56e-06 ***
## factor(Alc)1 0.41912 0.04788 8.754 < 2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for poisson family taken to be 12.45095)
##
## Number of Fisher Scoring iterations: 7
#exp(estimates) for percentages
#Poisson Mode Risk Ratios
round(exp(summary(fit1)$coef[-1,1]), 3)
## factor(Race_eth)nh black factor(Race_eth)nh multirace
## 1.078 1.781
## factor(Race_eth)nh other factor(Race_eth)nhwhite
## 1.186 1.036
## factor(Dep)1 factor(Alc)1
## 1.270 1.521
mean(subset$Drank)
## [1] 1.198342
var(subset$Drank)
## [1] 13.34087
Overdispersion
#Poisson GLM Fit Non Survey Data
fit2<-glm(Drank~factor(Race_eth)+factor(Dep)+factor(Alc), data=BRFSS19, family=poisson)
summary(fit2)
##
## Call:
## glm(formula = Drank ~ factor(Race_eth) + factor(Dep) + factor(Alc),
## family = poisson, data = BRFSS19)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -2.5988 -1.5871 -1.4358 -0.5833 22.4438
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 0.230685 0.016547 13.941 < 2e-16 ***
## factor(Race_eth)nh black -0.139972 0.021434 -6.530 6.56e-11 ***
## factor(Race_eth)nh multirace 0.301032 0.029421 10.232 < 2e-16 ***
## factor(Race_eth)nh other 0.126901 0.027354 4.639 3.50e-06 ***
## factor(Race_eth)nhwhite -0.200350 0.016770 -11.947 < 2e-16 ***
## factor(Dep)1 0.287727 0.009760 29.480 < 2e-16 ***
## factor(Alc)1 0.397535 0.009003 44.155 < 2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for poisson family taken to be 1)
##
## Null deviance: 247048 on 49431 degrees of freedom
## Residual deviance: 242397 on 49425 degrees of freedom
## (368836 observations deleted due to missingness)
## AIC: 280257
##
## Number of Fisher Scoring iterations: 7
#ChiSquare test
scale<-sqrt(fit2$deviance/fit2$df.residual)
scale
## [1] 2.214577
1-pchisq(fit2$deviance, df = fit2$df.residual)
## [1] 0
Quasi distribution
#QuasiPoisson
fit3<-glm(Drank~factor(Alc)+factor(Dep)+factor(Race_eth), data=BRFSS19, family=quasipoisson)
summary(fit3)
##
## Call:
## glm(formula = Drank ~ factor(Alc) + factor(Dep) + factor(Race_eth),
## family = quasipoisson, data = BRFSS19)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -2.5988 -1.5871 -1.4358 -0.5833 22.4438
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 0.23069 0.05991 3.851 0.000118 ***
## factor(Alc)1 0.39754 0.03259 12.196 < 2e-16 ***
## factor(Dep)1 0.28773 0.03533 8.143 3.94e-16 ***
## factor(Race_eth)nh black -0.13997 0.07760 -1.804 0.071269 .
## factor(Race_eth)nh multirace 0.30103 0.10651 2.826 0.004712 **
## factor(Race_eth)nh other 0.12690 0.09903 1.281 0.200038
## factor(Race_eth)nhwhite -0.20035 0.06071 -3.300 0.000968 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for quasipoisson family taken to be 13.10665)
##
## Null deviance: 247048 on 49431 degrees of freedom
## Residual deviance: 242397 on 49425 degrees of freedom
## (368836 observations deleted due to missingness)
## AIC: NA
##
## Number of Fisher Scoring iterations: 7
Negative binomial models
library(lmtest)
## Loading required package: zoo
##
## Attaching package: 'zoo'
## The following objects are masked from 'package:base':
##
## as.Date, as.Date.numeric
library(sandwich)
coeftest(fit2, vcov=vcovHC(fit2, type="HC1",cluster="ststr"))
##
## z test of coefficients:
##
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 0.230685 0.057885 3.9852 6.742e-05 ***
## factor(Race_eth)nh black -0.139972 0.076469 -1.8304 0.0671857 .
## factor(Race_eth)nh multirace 0.301032 0.109523 2.7486 0.0059855 **
## factor(Race_eth)nh other 0.126901 0.104766 1.2113 0.2257882
## factor(Race_eth)nhwhite -0.200350 0.059116 -3.3891 0.0007012 ***
## factor(Dep)1 0.287727 0.036446 7.8947 2.911e-15 ***
## factor(Alc)1 0.397535 0.033765 11.7735 < 2.2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
#Negative binomial fit
library(MASS)
##
## Attaching package: 'MASS'
## The following object is masked from 'package:dplyr':
##
## select
fit.nb1<-glm.nb(Drank~factor(Race_eth),
data=subset,
weights=llcpwt/mean(llcpwt, na.rm=T))
fit.nb2<-glm.nb(Drank~factor(Race_eth)+factor(Dep)+factor(Alc),
data=subset,
weights=llcpwt/mean(llcpwt, na.rm=T))
tests1<-coeftest(fit.nb1, vcov=vcovHC(fit.nb2, type="HC1",cluster="llcpwt"))
tests<-coeftest(fit.nb2, vcov=vcovHC(fit.nb2, type="HC1",cluster="llcpwt"))
library(stargazer)
stargazer(fit.nb1, fit.nb2,style="demography", type = "text", t.auto=F,p.auto=F,coef=list(tests1[, 1],tests[,1]), se =list(tests1[, 2], tests[, 2]), p=list(tests1[,4],tests[, 4]) )
##
## --------------------------------------------------------------
## Drank
## Model 1 Model 2
## --------------------------------------------------------------
## factor(Race_eth)nh black -0.510 -0.353
## (0.330) (0.330)
## factor(Race_eth)nh multirace 0.076 0.238
## (0.516) (0.516)
## factor(Race_eth)nh other -1.015* -0.588
## (0.484) (0.484)
## factor(Race_eth)nhwhite -0.413 -0.178
## (0.289) (0.289)
## factor(Dep)1 0.418**
## (0.147)
## factor(Alc)1 0.386**
## (0.142)
## Constant 0.712** 0.264
## (0.275) (0.275)
## N 3,015 3,015
## Log Likelihood -3,964.059 -3,944.901
## theta 0.166*** (0.007) 0.172*** (0.008)
## AIC 7,938.118 7,903.803
## --------------------------------------------------------------
## *p < .05; **p < .01; ***p < .001
Count Outcome: Binge Drinking; Considering all types of alcoholic beverages, how many times during the past 30 days did you have 5 or more drinks for men or 4 or more drinks for women on an occasion? Research Question: Does growing up with a problem drinker or alcoholic or growing up with anyone who was depressed, mentally ill, or suicidal, and race/ethnicity predict the number of times respondents binged drink during the past 30 days. It is a count model, so, no, an offset term necessary because each respondent has the same time reference (30 days) when asked to recall to answer the question. Dispersion: In evaluating the level of dispersion in the outcome, the mean and variance of the Poisson regression model are not equal with variance being higher than the mean: over dispersion. Based on the Chi-square fit test, the Poisson model is not a good choice. Rather, Negative Binomial GLM is a better suited model. In comparing the two Negative Binomial GLMs, the two models have similar SEs but differ in betas. Also, the second model has smaller AIC, indicating a better fitted model. Results: Respondents who grew up with a problem drinker or alcoholic had 37% more binge drinking days than respondents who did not grow up with a problem drinker or alcoholic. Respondents who grew up with anyone who was depressed, mentally ill, or suicidal had 42% more binge drinking days than respondents who did not grow up with anyone who was depressed, mentally ill, or suicidal.