#All of the variables in this dataset are from brfss 2017. The relationship between gender, education level, internet usage, income level and mental health days will be analyzed.
#The count outcome variable is number of days suffering from a mental health in the last 30 days.
#Research Question 1. Is higher level of income associated with less number of days suffering from mental health? 2. Are individuals who donโt use the internet more likely to have less number of days suffering from mental health? 3. Are individuals with higher education more likely to have less mental health days?
#A offset term is not necessary because all respondents were asked in the last 30 days. There was no variation.
library(haven)
library(readr)
library (car)
## Loading required package: carData
library(stargazer)
##
## Please cite as:
## Hlavac, Marek (2018). stargazer: Well-Formatted Regression and Summary Statistics Tables.
## R package version 5.2.2. https://CRAN.R-project.org/package=stargazer
setwd('C:/Users/tim1203/Desktop/utsa/C. Sparks/Dem 7283/Data sets')
load("brfss_2017.rdata")
library(questionr)
## Warning: package 'questionr' was built under R version 3.6.2
library(dplyr)
##
## Attaching package: 'dplyr'
## The following object is masked from 'package:car':
##
## recode
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
names(brfss_17)
## [1] "dispcode" "statere1" "safetime" "hhadult" "genhlth" "physhlth"
## [7] "menthlth" "poorhlth" "hlthpln1" "persdoc2" "medcost" "checkup1"
## [13] "bphigh4" "bpmeds" "cholchk1" "toldhi2" "cholmed1" "cvdinfr4"
## [19] "cvdcrhd4" "cvdstrk3" "asthma3" "asthnow" "chcscncr" "chcocncr"
## [25] "chccopd1" "havarth3" "addepev2" "chckidny" "diabete3" "diabage2"
## [31] "lmtjoin3" "arthdis2" "arthsocl" "joinpai1" "sex" "marital"
## [37] "educa" "renthom1" "numhhol2" "numphon2" "cpdemo1a" "veteran3"
## [43] "employ1" "children" "income2" "internet" "weight2" "height3"
## [49] "pregnant" "deaf" "blind" "decide" "diffwalk" "diffdres"
## [55] "diffalon" "smoke100" "smokday2" "stopsmk2" "lastsmk2" "usenow3"
## [61] "ecigaret" "ecignow" "alcday5" "avedrnk2" "drnk3ge5" "maxdrnks"
## [67] "fruit2" "fruitju2" "fvgreen1" "frenchf1" "potatoe1" "vegetab2"
## [73] "exerany2" "exract11" "exeroft1" "exerhmm1" "exract21" "exeroft2"
## [79] "exerhmm2" "strength" "seatbelt" "flushot6" "flshtmy2" "pneuvac3"
## [85] "shingle2" "hivtst6" "hivtstd3" "hivrisk5" "casthdx2" "casthno2"
## [91] "callbckz" "wdusenow" "wdinftrk" "wdhowoft" "wdshare" "namtribe"
## [97] "namothr" "urbnrrl" "ststr" "impsex" "rfhlth" "phys14d"
## [103] "ment14d" "hcvu651" "rfhype5" "cholch1" "rfchol1" "michd"
## [109] "ltasth1" "casthm1" "asthms1" "drdxar1" "lmtact1" "lmtwrk1"
## [115] "lmtscl1" "prace1" "mrace1" "hispanc" "race" "raceg21"
## [121] "racegr3" "ageg5yr" "age65yr" "age80" "ageg" "wtkg3"
## [127] "bmi5" "bmi5cat" "rfbmi5" "educag" "incomg" "smoker3"
## [133] "rfsmok3" "ecigsts" "curecig" "drnkany5" "rfbing5" "drnkwek"
## [139] "rfdrhv5" "ftjuda2" "frutda2" "grenda1" "frnchda" "potada1"
## [145] "vegeda2" "misfrt1" "misveg1" "frtres1" "vegres1" "frutsu1"
## [151] "vegesu1" "frtlt1a" "veglt1a" "frt16a" "veg23a" "fruite1"
## [157] "vegete1" "totinda" "minac11" "minac21" "pacat1" "paindx1"
## [163] "pa150r2" "pa300r2" "pa30021" "pastrng" "parec1" "pastae1"
## [169] "rfseat2" "rfseat3" "flshot6" "pneumo2" "aidtst3" "mmsa"
## [175] "mmsawt" "seqno" "mmsaname"
##Recoding of variables
table(brfss_17$menthlth)
##
## 1 2 3 4 5 6 7 8 9 10
## 7526 11574 6897 3484 8490 973 3285 672 112 6041
## 11 12 13 14 15 16 17 18 19 20
## 42 467 72 1244 5567 110 84 130 8 3405
## 21 22 23 24 25 26 27 28 29 30
## 217 65 39 58 1169 49 98 329 198 12112
## 77 88 99
## 2433 152826 1099
brfss_17$mentaldays<-Recode(brfss_17$menthlth, recodes="88=0; 77=NA; 99=NA")
table(brfss_17$mentaldays)
##
## 0 1 2 3 4 5 6 7 8 9
## 152826 7526 11574 6897 3484 8490 973 3285 672 112
## 10 11 12 13 14 15 16 17 18 19
## 6041 42 467 72 1244 5567 110 84 130 8
## 20 21 22 23 24 25 26 27 28 29
## 3405 217 65 39 58 1169 49 98 329 198
## 30
## 12112
hist(brfss_17$mentaldays)
summary(brfss_17$mentaldays)
## Min. 1st Qu. Median Mean 3rd Qu. Max. NA's
## 0.00 0.00 0.00 3.54 2.00 30.00 3532
#The average number of mental days is about 4 days.
table(brfss_17$internet)
##
## 1 2 7 9
## 191714 36199 228 409
brfss_17$usage<-Recode(brfss_17$internet, recodes="1=1; 2=0; 7=NA; 9=NA; else=NA")
table(brfss_17$usage)
##
## 0 1
## 36199 191714
brfss_17$usage2<-Recode(brfss_17$usage, recodes="1='usage'; 0='no usage'; else=NA", as.factor=T)
brfss_17$usage2<-relevel(brfss_17$usage2, ref = 'usage')
table(brfss_17$usage2)
##
## usage no usage
## 191714 36199
table(brfss_17$sex)
##
## 1 2 9
## 102083 128633 159
brfss_17$sex2<-Recode(brfss_17$sex, recodes="1=1; 2=0; else=NA")
table(brfss_17$sex2)
##
## 0 1
## 128633 102083
brfss_17$sex2b<-Recode(brfss_17$sex2, recodes="1='male'; 0='female'; else=NA", as.factor=T)
brfss_17$sex2b<-relevel(brfss_17$sex2b, ref = 'male')
table(brfss_17$sex2b)
##
## male female
## 102083 128633
table(brfss_17$income2)
##
## 1 2 3 4 5 6 7 8 77 99
## 8193 8366 12695 15902 18533 25814 30049 71998 16588 21037
brfss_17$newincome<-Recode(brfss_17$income2, recodes="1:2=1; 3:4=2; 5=3; 6=4; 7=5; 8=6; 77=NA; 99=NA")
table(brfss_17$newincome)
##
## 1 2 3 4 5 6
## 16559 28597 18533 25814 30049 71998
brfss_17$newincome2<-Recode(brfss_17$newincome, recodes="1='less than 15k'; 2='less than 25k'; 3='less than 35k'; 4='less than 50k'; 5='less than 75k'; 6='more than 75k'; else=NA", as.factor=T)
brfss_17$newincome2<-relevel(brfss_17$newincome2, ref = 'more than 75k')
table(brfss_17$newincome2)
##
## more than 75k less than 15k less than 25k less than 35k less than 50k
## 71998 16559 28597 18533 25814
## less than 75k
## 30049
brfss_17$educa2<-Recode(brfss_17$educa, recodes="1:2='0 Prim'; 3='1somehs'; 4='2hsgrad'; 5='3somecol'; 6='4colgrad'; 9=NA", as.factor=T)
brfss_17$educa2<-relevel(brfss_17$educa2, ref = '2hsgrad')
table(brfss_17$educa2)
##
## 2hsgrad 0 Prim 1somehs 3somecol 4colgrad
## 55951 5133 9493 62309 97008
sub<-brfss_17 %>%
select(mentaldays, mmsaname, educa2, sex2b, newincome2, usage2, mmsawt, ststr) %>%
filter(complete.cases(.))
library(dplyr)
library(survey)
## Loading required package: grid
## Loading required package: Matrix
## Loading required package: survival
## Warning: package 'survival' was built under R version 3.6.2
##
## Attaching package: 'survey'
## The following object is masked from 'package:graphics':
##
## dotchart
##Analysis
options(survey.lonely.psu = "adjust")
des<-svydesign(ids=~1, strata=~ststr, weights=~mmsawt, data = sub)
#Poisson regression example
svyhist(~mentaldays, des)
svyby(~mentaldays, ~sex2b+newincome2, des, svymean, na.rm=T)
## sex2b newincome2 mentaldays se
## male.more than 75k male more than 75k 2.116263 0.05913501
## female.more than 75k female more than 75k 3.242266 0.08539947
## male.less than 15k male less than 15k 6.232165 0.24811463
## female.less than 15k female less than 15k 7.318468 0.23063069
## male.less than 25k male less than 25k 4.416692 0.16217125
## female.less than 25k female less than 25k 5.598148 0.15871908
## male.less than 35k male less than 35k 3.535873 0.15555073
## female.less than 35k female less than 35k 4.517988 0.16942096
## male.less than 50k male less than 50k 3.119848 0.14731679
## female.less than 50k female less than 50k 4.180192 0.14579417
## male.less than 75k male less than 75k 3.025882 0.13051997
## female.less than 75k female less than 75k 4.024994 0.13175401
#Both females and males that make less than 15k have higher mental days at 7 and 6 days respectively. Whereas, males who make more than 75k have about 2 mental days in the last 30 days.
svyby(~mentaldays, ~usage2, des, svymean, na.rm=T)
## usage2 mentaldays se
## usage usage 3.783900 0.04024322
## no usage no usage 4.186328 0.12495850
#Individuals who use interent have less mental days.
svyby(~mentaldays, ~educa2, des, svymean, na.rm=T)
## educa2 mentaldays se
## 2hsgrad 2hsgrad 4.315291 0.08358552
## 0 Prim 0 Prim 3.918243 0.24206102
## 1somehs 1somehs 5.426346 0.21144581
## 3somecol 3somecol 4.215175 0.07398868
## 4colgrad 4colgrad 2.750567 0.04092368
#Individuals who have graduated from college have the least number of mental health days at 2.75.
##Poisson glm fit to survey data
fit1<-svyglm(mentaldays~factor(sex2b)+factor(newincome2)+factor(usage2)+factor(educa2), design=des, family=poisson)
summary(fit1)
##
## Call:
## svyglm(formula = mentaldays ~ factor(sex2b) + factor(newincome2) +
## factor(usage2) + factor(educa2), design = des, family = poisson)
##
## Survey design:
## svydesign(ids = ~1, strata = ~ststr, weights = ~mmsawt, data = sub)
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 0.95262 0.03164 30.112 < 2e-16 ***
## factor(sex2b)female 0.29127 0.02033 14.325 < 2e-16 ***
## factor(newincome2)less than 15k 0.89118 0.03672 24.269 < 2e-16 ***
## factor(newincome2)less than 25k 0.58017 0.03271 17.735 < 2e-16 ***
## factor(newincome2)less than 35k 0.35251 0.03709 9.505 < 2e-16 ***
## factor(newincome2)less than 50k 0.25202 0.03596 7.009 2.42e-12 ***
## factor(newincome2)less than 75k 0.22997 0.03349 6.868 6.55e-12 ***
## factor(usage2)no usage -0.23048 0.03256 -7.079 1.45e-12 ***
## factor(educa2)0 Prim -0.23056 0.06613 -3.486 0.00049 ***
## factor(educa2)1somehs 0.09782 0.04374 2.237 0.02532 *
## factor(educa2)3somecol 0.02255 0.02626 0.859 0.39047
## factor(educa2)4colgrad -0.24748 0.02777 -8.911 < 2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for poisson family taken to be 15.61299)
##
## Number of Fisher Scoring iterations: 6
#Females had higher mean counts of mental health days than males. All of the brackets for income also have higher mean counts of mental health days than someone who makes more thank 75k. Individuals who do not use the interent have less mean count of mental health days. In regards to education, those in primary school and college graduates have less mean count to have more mental health days than a hs grad.
round(exp(summary(fit1)$coef[-1, 1]), 3)
## factor(sex2b)female factor(newincome2)less than 15k
## 1.338 2.438
## factor(newincome2)less than 25k factor(newincome2)less than 35k
## 1.786 1.423
## factor(newincome2)less than 50k factor(newincome2)less than 75k
## 1.287 1.259
## factor(usage2)no usage factor(educa2)0 Prim
## 0.794 0.794
## factor(educa2)1somehs factor(educa2)3somecol
## 1.103 1.023
## factor(educa2)4colgrad
## 0.781
#Overdispersion
fit2<-glm(mentaldays~factor(sex2b)+factor(newincome2)+factor(usage2)+factor(educa2), data = brfss_17, family=poisson)
summary(fit2)
##
## Call:
## glm(formula = mentaldays ~ factor(sex2b) + factor(newincome2) +
## factor(usage2) + factor(educa2), family = poisson, data = brfss_17)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -4.5513 -2.5648 -2.1827 -0.4266 11.2853
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 0.778895 0.003919 198.74 <2e-16 ***
## factor(sex2b)female 0.250527 0.002541 98.58 <2e-16 ***
## factor(newincome2)less than 15k 1.170034 0.004285 273.04 <2e-16 ***
## factor(newincome2)less than 25k 0.797428 0.003973 200.74 <2e-16 ***
## factor(newincome2)less than 35k 0.517566 0.004674 110.73 <2e-16 ***
## factor(newincome2)less than 50k 0.367728 0.004335 84.82 <2e-16 ***
## factor(newincome2)less than 75k 0.271951 0.004201 64.73 <2e-16 ***
## factor(usage2)no usage -0.292809 0.003627 -80.74 <2e-16 ***
## factor(educa2)0 Prim -0.102556 0.008370 -12.25 <2e-16 ***
## factor(educa2)1somehs 0.138232 0.005581 24.77 <2e-16 ***
## factor(educa2)3somecol 0.043996 0.003251 13.53 <2e-16 ***
## factor(educa2)4colgrad -0.161409 0.003456 -46.70 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for poisson family taken to be 1)
##
## Null deviance: 2049916 on 188385 degrees of freedom
## Residual deviance: 1914529 on 188374 degrees of freedom
## (42489 observations deleted due to missingness)
## AIC: 2149248
##
## Number of Fisher Scoring iterations: 7
scale<-sqrt(fit2$deviance/fit2$df.residual)
scale
## [1] 3.188016
1-pchisq(fit2$deviance, df = fit2$df.residual)
## [1] 0
#This model does not fit the data.
fit3<-glm(mentaldays~factor(sex2b)+factor(newincome2)+factor(usage2)+factor(educa2), data = brfss_17, family=quasipoisson)
summary(fit3)
##
## Call:
## glm(formula = mentaldays ~ factor(sex2b) + factor(newincome2) +
## factor(usage2) + factor(educa2), family = quasipoisson, data = brfss_17)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -4.5513 -2.5648 -2.1827 -0.4266 11.2853
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 0.77889 0.01566 49.748 < 2e-16 ***
## factor(sex2b)female 0.25053 0.01015 24.677 < 2e-16 ***
## factor(newincome2)less than 15k 1.17003 0.01712 68.346 < 2e-16 ***
## factor(newincome2)less than 25k 0.79743 0.01587 50.248 < 2e-16 ***
## factor(newincome2)less than 35k 0.51757 0.01867 27.718 < 2e-16 ***
## factor(newincome2)less than 50k 0.36773 0.01732 21.233 < 2e-16 ***
## factor(newincome2)less than 75k 0.27195 0.01678 16.203 < 2e-16 ***
## factor(usage2)no usage -0.29281 0.01449 -20.211 < 2e-16 ***
## factor(educa2)0 Prim -0.10256 0.03344 -3.067 0.002163 **
## factor(educa2)1somehs 0.13823 0.02230 6.200 5.66e-10 ***
## factor(educa2)3somecol 0.04400 0.01299 3.387 0.000707 ***
## factor(educa2)4colgrad -0.16141 0.01381 -11.690 < 2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for quasipoisson family taken to be 15.95961)
##
## Null deviance: 2049916 on 188385 degrees of freedom
## Residual deviance: 1914529 on 188374 degrees of freedom
## (42489 observations deleted due to missingness)
## AIC: NA
##
## Number of Fisher Scoring iterations: 7
#Dispersion is 15.95 which is high and should be addressed.
library(lmtest)
## Loading required package: zoo
##
## Attaching package: 'zoo'
## The following objects are masked from 'package:base':
##
## as.Date, as.Date.numeric
library(sandwich)
coeftest(fit2, vcov=vcovHC(fit2, type="HC1", cluster="ststr"))
##
## z test of coefficients:
##
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 0.778895 0.016049 48.5331 < 2.2e-16
## factor(sex2b)female 0.250527 0.010328 24.2572 < 2.2e-16
## factor(newincome2)less than 15k 1.170034 0.017327 67.5254 < 2.2e-16
## factor(newincome2)less than 25k 0.797428 0.016228 49.1383 < 2.2e-16
## factor(newincome2)less than 35k 0.517566 0.019027 27.2022 < 2.2e-16
## factor(newincome2)less than 50k 0.367728 0.017410 21.1222 < 2.2e-16
## factor(newincome2)less than 75k 0.271951 0.016733 16.2525 < 2.2e-16
## factor(usage2)no usage -0.292809 0.015283 -19.1589 < 2.2e-16
## factor(educa2)0 Prim -0.102556 0.036151 -2.8369 0.0045551
## factor(educa2)1somehs 0.138232 0.023003 6.0094 1.862e-09
## factor(educa2)3somecol 0.043996 0.013262 3.3173 0.0009088
## factor(educa2)4colgrad -0.161409 0.014183 -11.3805 < 2.2e-16
##
## (Intercept) ***
## factor(sex2b)female ***
## factor(newincome2)less than 15k ***
## factor(newincome2)less than 25k ***
## factor(newincome2)less than 35k ***
## factor(newincome2)less than 50k ***
## factor(newincome2)less than 75k ***
## factor(usage2)no usage ***
## factor(educa2)0 Prim **
## factor(educa2)1somehs ***
## factor(educa2)3somecol ***
## factor(educa2)4colgrad ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
summary(fit1)
##
## Call:
## svyglm(formula = mentaldays ~ factor(sex2b) + factor(newincome2) +
## factor(usage2) + factor(educa2), design = des, family = poisson)
##
## Survey design:
## svydesign(ids = ~1, strata = ~ststr, weights = ~mmsawt, data = sub)
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 0.95262 0.03164 30.112 < 2e-16 ***
## factor(sex2b)female 0.29127 0.02033 14.325 < 2e-16 ***
## factor(newincome2)less than 15k 0.89118 0.03672 24.269 < 2e-16 ***
## factor(newincome2)less than 25k 0.58017 0.03271 17.735 < 2e-16 ***
## factor(newincome2)less than 35k 0.35251 0.03709 9.505 < 2e-16 ***
## factor(newincome2)less than 50k 0.25202 0.03596 7.009 2.42e-12 ***
## factor(newincome2)less than 75k 0.22997 0.03349 6.868 6.55e-12 ***
## factor(usage2)no usage -0.23048 0.03256 -7.079 1.45e-12 ***
## factor(educa2)0 Prim -0.23056 0.06613 -3.486 0.00049 ***
## factor(educa2)1somehs 0.09782 0.04374 2.237 0.02532 *
## factor(educa2)3somecol 0.02255 0.02626 0.859 0.39047
## factor(educa2)4colgrad -0.24748 0.02777 -8.911 < 2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for poisson family taken to be 15.61299)
##
## Number of Fisher Scoring iterations: 6
#They are similar but not identical.
#Fit the Negative Biomial GLM
library(MASS)
##
## Attaching package: 'MASS'
## The following object is masked from 'package:dplyr':
##
## select
library(MASS)
fit.nb1<-glm.nb(mentaldays~factor(newincome2), data = sub, weights=mmsawt/mean(mmsawt, na.rm=T))
fit.nb2<-glm.nb(mentaldays~factor(newincome2)+factor(usage2)+factor(educa2), data = sub, weights=mmsawt/mean(mmsawt, na.rm=T))
tests1<-coeftest(fit.nb1, vcov=vcovHC(fit.nb2, type = "HC1", cluster="ststr"))
tests<-coeftest(fit.nb2, vcov=vcovHC(fit.nb2, type = "HC1", cluster="ststr"))
library(stargazer)
stargazer(fit.nb1, fit.nb2, style = "demography", type="text", t.auto=F, p.auto=F, coef=list(tests1[,1], tests[,1]), se=list(tests1[, 2], tests[,2]), p=list(tests1[,4], tests[,4]))
##
## -----------------------------------------------------------------
## mentaldays
## Model 1 Model 2
## -----------------------------------------------------------------
## factor(newincome2)less than 15k 0.959*** 0.935***
## (0.039) (0.039)
## factor(newincome2)less than 25k 0.651*** 0.613***
## (0.034) (0.034)
## factor(newincome2)less than 35k 0.432*** 0.394***
## (0.037) (0.037)
## factor(newincome2)less than 50k 0.321*** 0.266***
## (0.035) (0.035)
## factor(newincome2)less than 75k 0.288*** 0.248***
## (0.033) (0.033)
## factor(usage2)no usage -0.247***
## (0.033)
## factor(educa2)0 Prim -0.185*
## (0.073)
## factor(educa2)1somehs 0.105*
## (0.046)
## factor(educa2)3somecol 0.024
## (0.028)
## factor(educa2)4colgrad -0.243***
## (0.029)
## Constant 0.970*** 1.089***
## (0.031) (0.031)
## N 188,386 188,386
## Log Likelihood -351,719.800 -351,458.600
## theta 0.145*** (0.001) 0.146*** (0.001)
## AIC 703,451.500 702,939.200
## -----------------------------------------------------------------
## *p < .05; **p < .01; ***p < .001
#Model 2 is a better fit because the AIC value is lower.