#All of the variables in this dataset are from brfss 2017. The relationship between gender, education level, internet usage, income level and mental health days will be analyzed.

#The count outcome variable is number of days suffering from a mental health in the last 30 days.

#Research Question 1. Is higher level of income associated with less number of days suffering from mental health? 2. Are individuals who donโ€™t use the internet more likely to have less number of days suffering from mental health? 3. Are individuals with higher education more likely to have less mental health days?

#A offset term is not necessary because all respondents were asked in the last 30 days. There was no variation.

library(haven)
library(readr)
library (car)
## Loading required package: carData
library(stargazer)
## 
## Please cite as:
##  Hlavac, Marek (2018). stargazer: Well-Formatted Regression and Summary Statistics Tables.
##  R package version 5.2.2. https://CRAN.R-project.org/package=stargazer
setwd('C:/Users/tim1203/Desktop/utsa/C. Sparks/Dem 7283/Data sets')
load("brfss_2017.rdata")
library(questionr)
## Warning: package 'questionr' was built under R version 3.6.2
library(dplyr)
## 
## Attaching package: 'dplyr'
## The following object is masked from 'package:car':
## 
##     recode
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
names(brfss_17)
##   [1] "dispcode" "statere1" "safetime" "hhadult"  "genhlth"  "physhlth"
##   [7] "menthlth" "poorhlth" "hlthpln1" "persdoc2" "medcost"  "checkup1"
##  [13] "bphigh4"  "bpmeds"   "cholchk1" "toldhi2"  "cholmed1" "cvdinfr4"
##  [19] "cvdcrhd4" "cvdstrk3" "asthma3"  "asthnow"  "chcscncr" "chcocncr"
##  [25] "chccopd1" "havarth3" "addepev2" "chckidny" "diabete3" "diabage2"
##  [31] "lmtjoin3" "arthdis2" "arthsocl" "joinpai1" "sex"      "marital" 
##  [37] "educa"    "renthom1" "numhhol2" "numphon2" "cpdemo1a" "veteran3"
##  [43] "employ1"  "children" "income2"  "internet" "weight2"  "height3" 
##  [49] "pregnant" "deaf"     "blind"    "decide"   "diffwalk" "diffdres"
##  [55] "diffalon" "smoke100" "smokday2" "stopsmk2" "lastsmk2" "usenow3" 
##  [61] "ecigaret" "ecignow"  "alcday5"  "avedrnk2" "drnk3ge5" "maxdrnks"
##  [67] "fruit2"   "fruitju2" "fvgreen1" "frenchf1" "potatoe1" "vegetab2"
##  [73] "exerany2" "exract11" "exeroft1" "exerhmm1" "exract21" "exeroft2"
##  [79] "exerhmm2" "strength" "seatbelt" "flushot6" "flshtmy2" "pneuvac3"
##  [85] "shingle2" "hivtst6"  "hivtstd3" "hivrisk5" "casthdx2" "casthno2"
##  [91] "callbckz" "wdusenow" "wdinftrk" "wdhowoft" "wdshare"  "namtribe"
##  [97] "namothr"  "urbnrrl"  "ststr"    "impsex"   "rfhlth"   "phys14d" 
## [103] "ment14d"  "hcvu651"  "rfhype5"  "cholch1"  "rfchol1"  "michd"   
## [109] "ltasth1"  "casthm1"  "asthms1"  "drdxar1"  "lmtact1"  "lmtwrk1" 
## [115] "lmtscl1"  "prace1"   "mrace1"   "hispanc"  "race"     "raceg21" 
## [121] "racegr3"  "ageg5yr"  "age65yr"  "age80"    "ageg"     "wtkg3"   
## [127] "bmi5"     "bmi5cat"  "rfbmi5"   "educag"   "incomg"   "smoker3" 
## [133] "rfsmok3"  "ecigsts"  "curecig"  "drnkany5" "rfbing5"  "drnkwek" 
## [139] "rfdrhv5"  "ftjuda2"  "frutda2"  "grenda1"  "frnchda"  "potada1" 
## [145] "vegeda2"  "misfrt1"  "misveg1"  "frtres1"  "vegres1"  "frutsu1" 
## [151] "vegesu1"  "frtlt1a"  "veglt1a"  "frt16a"   "veg23a"   "fruite1" 
## [157] "vegete1"  "totinda"  "minac11"  "minac21"  "pacat1"   "paindx1" 
## [163] "pa150r2"  "pa300r2"  "pa30021"  "pastrng"  "parec1"   "pastae1" 
## [169] "rfseat2"  "rfseat3"  "flshot6"  "pneumo2"  "aidtst3"  "mmsa"    
## [175] "mmsawt"   "seqno"    "mmsaname"

##Recoding of variables

table(brfss_17$menthlth)
## 
##      1      2      3      4      5      6      7      8      9     10 
##   7526  11574   6897   3484   8490    973   3285    672    112   6041 
##     11     12     13     14     15     16     17     18     19     20 
##     42    467     72   1244   5567    110     84    130      8   3405 
##     21     22     23     24     25     26     27     28     29     30 
##    217     65     39     58   1169     49     98    329    198  12112 
##     77     88     99 
##   2433 152826   1099
brfss_17$mentaldays<-Recode(brfss_17$menthlth, recodes="88=0; 77=NA; 99=NA")
table(brfss_17$mentaldays)
## 
##      0      1      2      3      4      5      6      7      8      9 
## 152826   7526  11574   6897   3484   8490    973   3285    672    112 
##     10     11     12     13     14     15     16     17     18     19 
##   6041     42    467     72   1244   5567    110     84    130      8 
##     20     21     22     23     24     25     26     27     28     29 
##   3405    217     65     39     58   1169     49     98    329    198 
##     30 
##  12112
hist(brfss_17$mentaldays)

summary(brfss_17$mentaldays)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max.    NA's 
##    0.00    0.00    0.00    3.54    2.00   30.00    3532

#The average number of mental days is about 4 days.

table(brfss_17$internet)
## 
##      1      2      7      9 
## 191714  36199    228    409
brfss_17$usage<-Recode(brfss_17$internet, recodes="1=1; 2=0; 7=NA; 9=NA; else=NA")
table(brfss_17$usage)
## 
##      0      1 
##  36199 191714
brfss_17$usage2<-Recode(brfss_17$usage, recodes="1='usage'; 0='no usage'; else=NA", as.factor=T)
brfss_17$usage2<-relevel(brfss_17$usage2, ref = 'usage')
table(brfss_17$usage2)
## 
##    usage no usage 
##   191714    36199
table(brfss_17$sex)
## 
##      1      2      9 
## 102083 128633    159
brfss_17$sex2<-Recode(brfss_17$sex, recodes="1=1; 2=0; else=NA")
table(brfss_17$sex2)
## 
##      0      1 
## 128633 102083
brfss_17$sex2b<-Recode(brfss_17$sex2, recodes="1='male'; 0='female'; else=NA", as.factor=T)
brfss_17$sex2b<-relevel(brfss_17$sex2b, ref = 'male')
table(brfss_17$sex2b)
## 
##   male female 
## 102083 128633
table(brfss_17$income2)
## 
##     1     2     3     4     5     6     7     8    77    99 
##  8193  8366 12695 15902 18533 25814 30049 71998 16588 21037
brfss_17$newincome<-Recode(brfss_17$income2, recodes="1:2=1; 3:4=2; 5=3; 6=4; 7=5; 8=6; 77=NA; 99=NA")
table(brfss_17$newincome)
## 
##     1     2     3     4     5     6 
## 16559 28597 18533 25814 30049 71998
brfss_17$newincome2<-Recode(brfss_17$newincome, recodes="1='less than 15k'; 2='less than 25k'; 3='less than 35k'; 4='less than 50k'; 5='less than 75k'; 6='more than 75k'; else=NA", as.factor=T)
brfss_17$newincome2<-relevel(brfss_17$newincome2, ref = 'more than 75k')
table(brfss_17$newincome2)
## 
## more than 75k less than 15k less than 25k less than 35k less than 50k 
##         71998         16559         28597         18533         25814 
## less than 75k 
##         30049
brfss_17$educa2<-Recode(brfss_17$educa, recodes="1:2='0 Prim'; 3='1somehs'; 4='2hsgrad'; 5='3somecol'; 6='4colgrad'; 9=NA", as.factor=T)
brfss_17$educa2<-relevel(brfss_17$educa2, ref = '2hsgrad')
table(brfss_17$educa2)
## 
##  2hsgrad   0 Prim  1somehs 3somecol 4colgrad 
##    55951     5133     9493    62309    97008
sub<-brfss_17 %>% 
  select(mentaldays, mmsaname, educa2, sex2b, newincome2, usage2, mmsawt, ststr) %>% 
  filter(complete.cases(.))
library(dplyr)
library(survey)
## Loading required package: grid
## Loading required package: Matrix
## Loading required package: survival
## Warning: package 'survival' was built under R version 3.6.2
## 
## Attaching package: 'survey'
## The following object is masked from 'package:graphics':
## 
##     dotchart

##Analysis

options(survey.lonely.psu = "adjust")
des<-svydesign(ids=~1, strata=~ststr, weights=~mmsawt, data = sub)

#Poisson regression example

svyhist(~mentaldays, des)

svyby(~mentaldays, ~sex2b+newincome2, des, svymean, na.rm=T)
##                       sex2b    newincome2 mentaldays         se
## male.more than 75k     male more than 75k   2.116263 0.05913501
## female.more than 75k female more than 75k   3.242266 0.08539947
## male.less than 15k     male less than 15k   6.232165 0.24811463
## female.less than 15k female less than 15k   7.318468 0.23063069
## male.less than 25k     male less than 25k   4.416692 0.16217125
## female.less than 25k female less than 25k   5.598148 0.15871908
## male.less than 35k     male less than 35k   3.535873 0.15555073
## female.less than 35k female less than 35k   4.517988 0.16942096
## male.less than 50k     male less than 50k   3.119848 0.14731679
## female.less than 50k female less than 50k   4.180192 0.14579417
## male.less than 75k     male less than 75k   3.025882 0.13051997
## female.less than 75k female less than 75k   4.024994 0.13175401

#Both females and males that make less than 15k have higher mental days at 7 and 6 days respectively. Whereas, males who make more than 75k have about 2 mental days in the last 30 days.

svyby(~mentaldays, ~usage2, des, svymean, na.rm=T)
##            usage2 mentaldays         se
## usage       usage   3.783900 0.04024322
## no usage no usage   4.186328 0.12495850

#Individuals who use interent have less mental days.

svyby(~mentaldays, ~educa2, des, svymean, na.rm=T)
##            educa2 mentaldays         se
## 2hsgrad   2hsgrad   4.315291 0.08358552
## 0 Prim     0 Prim   3.918243 0.24206102
## 1somehs   1somehs   5.426346 0.21144581
## 3somecol 3somecol   4.215175 0.07398868
## 4colgrad 4colgrad   2.750567 0.04092368

#Individuals who have graduated from college have the least number of mental health days at 2.75.

##Poisson glm fit to survey data

fit1<-svyglm(mentaldays~factor(sex2b)+factor(newincome2)+factor(usage2)+factor(educa2), design=des, family=poisson)
summary(fit1)
## 
## Call:
## svyglm(formula = mentaldays ~ factor(sex2b) + factor(newincome2) + 
##     factor(usage2) + factor(educa2), design = des, family = poisson)
## 
## Survey design:
## svydesign(ids = ~1, strata = ~ststr, weights = ~mmsawt, data = sub)
## 
## Coefficients:
##                                 Estimate Std. Error t value Pr(>|t|)    
## (Intercept)                      0.95262    0.03164  30.112  < 2e-16 ***
## factor(sex2b)female              0.29127    0.02033  14.325  < 2e-16 ***
## factor(newincome2)less than 15k  0.89118    0.03672  24.269  < 2e-16 ***
## factor(newincome2)less than 25k  0.58017    0.03271  17.735  < 2e-16 ***
## factor(newincome2)less than 35k  0.35251    0.03709   9.505  < 2e-16 ***
## factor(newincome2)less than 50k  0.25202    0.03596   7.009 2.42e-12 ***
## factor(newincome2)less than 75k  0.22997    0.03349   6.868 6.55e-12 ***
## factor(usage2)no usage          -0.23048    0.03256  -7.079 1.45e-12 ***
## factor(educa2)0 Prim            -0.23056    0.06613  -3.486  0.00049 ***
## factor(educa2)1somehs            0.09782    0.04374   2.237  0.02532 *  
## factor(educa2)3somecol           0.02255    0.02626   0.859  0.39047    
## factor(educa2)4colgrad          -0.24748    0.02777  -8.911  < 2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for poisson family taken to be 15.61299)
## 
## Number of Fisher Scoring iterations: 6

#Females had higher mean counts of mental health days than males. All of the brackets for income also have higher mean counts of mental health days than someone who makes more thank 75k. Individuals who do not use the interent have less mean count of mental health days. In regards to education, those in primary school and college graduates have less mean count to have more mental health days than a hs grad.

round(exp(summary(fit1)$coef[-1, 1]), 3)
##             factor(sex2b)female factor(newincome2)less than 15k 
##                           1.338                           2.438 
## factor(newincome2)less than 25k factor(newincome2)less than 35k 
##                           1.786                           1.423 
## factor(newincome2)less than 50k factor(newincome2)less than 75k 
##                           1.287                           1.259 
##          factor(usage2)no usage            factor(educa2)0 Prim 
##                           0.794                           0.794 
##           factor(educa2)1somehs          factor(educa2)3somecol 
##                           1.103                           1.023 
##          factor(educa2)4colgrad 
##                           0.781

#Overdispersion

fit2<-glm(mentaldays~factor(sex2b)+factor(newincome2)+factor(usage2)+factor(educa2), data = brfss_17, family=poisson)
summary(fit2)
## 
## Call:
## glm(formula = mentaldays ~ factor(sex2b) + factor(newincome2) + 
##     factor(usage2) + factor(educa2), family = poisson, data = brfss_17)
## 
## Deviance Residuals: 
##     Min       1Q   Median       3Q      Max  
## -4.5513  -2.5648  -2.1827  -0.4266  11.2853  
## 
## Coefficients:
##                                  Estimate Std. Error z value Pr(>|z|)    
## (Intercept)                      0.778895   0.003919  198.74   <2e-16 ***
## factor(sex2b)female              0.250527   0.002541   98.58   <2e-16 ***
## factor(newincome2)less than 15k  1.170034   0.004285  273.04   <2e-16 ***
## factor(newincome2)less than 25k  0.797428   0.003973  200.74   <2e-16 ***
## factor(newincome2)less than 35k  0.517566   0.004674  110.73   <2e-16 ***
## factor(newincome2)less than 50k  0.367728   0.004335   84.82   <2e-16 ***
## factor(newincome2)less than 75k  0.271951   0.004201   64.73   <2e-16 ***
## factor(usage2)no usage          -0.292809   0.003627  -80.74   <2e-16 ***
## factor(educa2)0 Prim            -0.102556   0.008370  -12.25   <2e-16 ***
## factor(educa2)1somehs            0.138232   0.005581   24.77   <2e-16 ***
## factor(educa2)3somecol           0.043996   0.003251   13.53   <2e-16 ***
## factor(educa2)4colgrad          -0.161409   0.003456  -46.70   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for poisson family taken to be 1)
## 
##     Null deviance: 2049916  on 188385  degrees of freedom
## Residual deviance: 1914529  on 188374  degrees of freedom
##   (42489 observations deleted due to missingness)
## AIC: 2149248
## 
## Number of Fisher Scoring iterations: 7
scale<-sqrt(fit2$deviance/fit2$df.residual)
scale
## [1] 3.188016
1-pchisq(fit2$deviance, df = fit2$df.residual)
## [1] 0

#This model does not fit the data.

fit3<-glm(mentaldays~factor(sex2b)+factor(newincome2)+factor(usage2)+factor(educa2), data = brfss_17, family=quasipoisson)
summary(fit3)
## 
## Call:
## glm(formula = mentaldays ~ factor(sex2b) + factor(newincome2) + 
##     factor(usage2) + factor(educa2), family = quasipoisson, data = brfss_17)
## 
## Deviance Residuals: 
##     Min       1Q   Median       3Q      Max  
## -4.5513  -2.5648  -2.1827  -0.4266  11.2853  
## 
## Coefficients:
##                                 Estimate Std. Error t value Pr(>|t|)    
## (Intercept)                      0.77889    0.01566  49.748  < 2e-16 ***
## factor(sex2b)female              0.25053    0.01015  24.677  < 2e-16 ***
## factor(newincome2)less than 15k  1.17003    0.01712  68.346  < 2e-16 ***
## factor(newincome2)less than 25k  0.79743    0.01587  50.248  < 2e-16 ***
## factor(newincome2)less than 35k  0.51757    0.01867  27.718  < 2e-16 ***
## factor(newincome2)less than 50k  0.36773    0.01732  21.233  < 2e-16 ***
## factor(newincome2)less than 75k  0.27195    0.01678  16.203  < 2e-16 ***
## factor(usage2)no usage          -0.29281    0.01449 -20.211  < 2e-16 ***
## factor(educa2)0 Prim            -0.10256    0.03344  -3.067 0.002163 ** 
## factor(educa2)1somehs            0.13823    0.02230   6.200 5.66e-10 ***
## factor(educa2)3somecol           0.04400    0.01299   3.387 0.000707 ***
## factor(educa2)4colgrad          -0.16141    0.01381 -11.690  < 2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for quasipoisson family taken to be 15.95961)
## 
##     Null deviance: 2049916  on 188385  degrees of freedom
## Residual deviance: 1914529  on 188374  degrees of freedom
##   (42489 observations deleted due to missingness)
## AIC: NA
## 
## Number of Fisher Scoring iterations: 7

#Dispersion is 15.95 which is high and should be addressed.

library(lmtest)
## Loading required package: zoo
## 
## Attaching package: 'zoo'
## The following objects are masked from 'package:base':
## 
##     as.Date, as.Date.numeric
library(sandwich)
coeftest(fit2, vcov=vcovHC(fit2, type="HC1", cluster="ststr"))
## 
## z test of coefficients:
## 
##                                  Estimate Std. Error  z value  Pr(>|z|)
## (Intercept)                      0.778895   0.016049  48.5331 < 2.2e-16
## factor(sex2b)female              0.250527   0.010328  24.2572 < 2.2e-16
## factor(newincome2)less than 15k  1.170034   0.017327  67.5254 < 2.2e-16
## factor(newincome2)less than 25k  0.797428   0.016228  49.1383 < 2.2e-16
## factor(newincome2)less than 35k  0.517566   0.019027  27.2022 < 2.2e-16
## factor(newincome2)less than 50k  0.367728   0.017410  21.1222 < 2.2e-16
## factor(newincome2)less than 75k  0.271951   0.016733  16.2525 < 2.2e-16
## factor(usage2)no usage          -0.292809   0.015283 -19.1589 < 2.2e-16
## factor(educa2)0 Prim            -0.102556   0.036151  -2.8369 0.0045551
## factor(educa2)1somehs            0.138232   0.023003   6.0094 1.862e-09
## factor(educa2)3somecol           0.043996   0.013262   3.3173 0.0009088
## factor(educa2)4colgrad          -0.161409   0.014183 -11.3805 < 2.2e-16
##                                    
## (Intercept)                     ***
## factor(sex2b)female             ***
## factor(newincome2)less than 15k ***
## factor(newincome2)less than 25k ***
## factor(newincome2)less than 35k ***
## factor(newincome2)less than 50k ***
## factor(newincome2)less than 75k ***
## factor(usage2)no usage          ***
## factor(educa2)0 Prim            ** 
## factor(educa2)1somehs           ***
## factor(educa2)3somecol          ***
## factor(educa2)4colgrad          ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
summary(fit1)
## 
## Call:
## svyglm(formula = mentaldays ~ factor(sex2b) + factor(newincome2) + 
##     factor(usage2) + factor(educa2), design = des, family = poisson)
## 
## Survey design:
## svydesign(ids = ~1, strata = ~ststr, weights = ~mmsawt, data = sub)
## 
## Coefficients:
##                                 Estimate Std. Error t value Pr(>|t|)    
## (Intercept)                      0.95262    0.03164  30.112  < 2e-16 ***
## factor(sex2b)female              0.29127    0.02033  14.325  < 2e-16 ***
## factor(newincome2)less than 15k  0.89118    0.03672  24.269  < 2e-16 ***
## factor(newincome2)less than 25k  0.58017    0.03271  17.735  < 2e-16 ***
## factor(newincome2)less than 35k  0.35251    0.03709   9.505  < 2e-16 ***
## factor(newincome2)less than 50k  0.25202    0.03596   7.009 2.42e-12 ***
## factor(newincome2)less than 75k  0.22997    0.03349   6.868 6.55e-12 ***
## factor(usage2)no usage          -0.23048    0.03256  -7.079 1.45e-12 ***
## factor(educa2)0 Prim            -0.23056    0.06613  -3.486  0.00049 ***
## factor(educa2)1somehs            0.09782    0.04374   2.237  0.02532 *  
## factor(educa2)3somecol           0.02255    0.02626   0.859  0.39047    
## factor(educa2)4colgrad          -0.24748    0.02777  -8.911  < 2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for poisson family taken to be 15.61299)
## 
## Number of Fisher Scoring iterations: 6

#They are similar but not identical.

#Fit the Negative Biomial GLM

library(MASS)
## 
## Attaching package: 'MASS'
## The following object is masked from 'package:dplyr':
## 
##     select
library(MASS)
fit.nb1<-glm.nb(mentaldays~factor(newincome2), data = sub, weights=mmsawt/mean(mmsawt, na.rm=T))
fit.nb2<-glm.nb(mentaldays~factor(newincome2)+factor(usage2)+factor(educa2), data = sub, weights=mmsawt/mean(mmsawt, na.rm=T))
tests1<-coeftest(fit.nb1, vcov=vcovHC(fit.nb2, type = "HC1", cluster="ststr"))
tests<-coeftest(fit.nb2, vcov=vcovHC(fit.nb2, type = "HC1", cluster="ststr"))
library(stargazer)
stargazer(fit.nb1, fit.nb2, style = "demography", type="text", t.auto=F, p.auto=F, coef=list(tests1[,1], tests[,1]), se=list(tests1[, 2], tests[,2]), p=list(tests1[,4], tests[,4]))
## 
## -----------------------------------------------------------------
##                                            mentaldays            
##                                     Model 1          Model 2     
## -----------------------------------------------------------------
## factor(newincome2)less than 15k     0.959***         0.935***    
##                                     (0.039)          (0.039)     
## factor(newincome2)less than 25k     0.651***         0.613***    
##                                     (0.034)          (0.034)     
## factor(newincome2)less than 35k     0.432***         0.394***    
##                                     (0.037)          (0.037)     
## factor(newincome2)less than 50k     0.321***         0.266***    
##                                     (0.035)          (0.035)     
## factor(newincome2)less than 75k     0.288***         0.248***    
##                                     (0.033)          (0.033)     
## factor(usage2)no usage                              -0.247***    
##                                                      (0.033)     
## factor(educa2)0 Prim                                 -0.185*     
##                                                      (0.073)     
## factor(educa2)1somehs                                 0.105*     
##                                                      (0.046)     
## factor(educa2)3somecol                                0.024      
##                                                      (0.028)     
## factor(educa2)4colgrad                              -0.243***    
##                                                      (0.029)     
## Constant                            0.970***         1.089***    
##                                     (0.031)          (0.031)     
## N                                   188,386          188,386     
## Log Likelihood                    -351,719.800     -351,458.600  
## theta                           0.145*** (0.001) 0.146*** (0.001)
## AIC                               703,451.500      702,939.200   
## -----------------------------------------------------------------
## *p < .05; **p < .01; ***p < .001

#Model 2 is a better fit because the AIC value is lower.