#load brfss
library(car)
library(stargazer)
library(survey)
library(sjPlot)
library(ggplot2)
#load data
library(haven)
nhis_4 <- read_stata("nhis_00004.dta.gz")
View(nhis_4)
nhis_4$ervisit<-Recode(nhis_4$eryrno, recodes = "20=1;30=2;41=3;40=4;42=5;43=6;50=7;60=8;61:62=9;00=NA;97:99=NA;10=NA")
hist(nhis_4$ervisit)

summary(nhis_4$ervisit)
## Min. 1st Qu. Median Mean 3rd Qu. Max. NA's
## 1.0 1.0 1.0 1.6 2.0 9.0 102684
#poor
nhis_4$poor<-Recode(nhis_4$pooryn, recodes =" 1=1;2=0;else=NA",as.factor=F)
##Alcohol
nhis_4$alcohol<-Recode(nhis_4$alclife, recodes ="1=1;2=0;else=NA")
##smoking
nhis_4$smoke<-Recode(nhis_4$smokev, recodes ="7:9=NA;1=1;2=0; else=NA")
##usborn
nhis_4$born<-Recode(nhis_4$usborn, recodes ="else=NA; 20=1;10:12=0")
##Recode
##citizen
nhis_4$mycitizen<-Recode(nhis_4$citizen, recodes ="7:9=NA; 1=1;2=0")
#Age cut into intervals
nhis_4$agec<-cut(nhis_4$age,breaks = c(18, 25, 35, 45, 55, 65, 75, 85), include.lowest = T )
- Define a count outcome for the dataset of your choosing
##I chose ER visits
- State a research question about your outcome
##The question is what is the number of times you visited the ER in the past 12 months
- Is an offset term necessary? why or why not?
##No the offset term is not necessary because the parameter is within a certain number of visits and in the 12 months
- Consider a Poisson regression model for the outcome
library(dplyr)
##
## Attaching package: 'dplyr'
## The following object is masked from 'package:car':
##
## recode
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
sub<-nhis_4%>%
select(alcohol, citizen,usborn,
agec,ervisit,strata,psu, perweight, smoke) %>%
filter( complete.cases(.))
#First we tell R our survey design
options(survey.lonely.psu = "adjust")
des<-svydesign(ids=~1, strata=~strata, weights=~perweight, data =sub )
#OR THE BRFSS, R GAVE ME A WARNING AND I NEEDED TO ADD:
#YOU MAY NOT NEED TO DO THIS!!!!
#First we tell R our survey design
options(survey.lonely.psu = "adjust")
des<-svydesign(ids=~1, strata=~strata,
weights=~perweight,
data = nhis_4[is.na(nhis_4$perweight)==F,])
- Evaluate the level of dispersion in the outcome
#First I do some simple descriptives
svyhist(~ervisit, des)

svyby(~ervisit, ~usborn+smoke, des, svymean, na.rm=T)
## usborn smoke ervisit se
## 11.0 11 0 1.792929 0.19732991
## 12.0 12 0 1.502240 0.08744699
## 20.0 20 0 1.696059 0.02940609
## 97.0 97 0 1.000000 0.00000000
## 99.0 99 0 0.000000 0.00000000
## 11.1 11 1 2.031835 0.38265859
## 12.1 12 1 1.476708 0.04524301
## 20.1 20 1 1.579261 0.02494510
## 97.1 97 1 0.000000 0.00000000
## 99.1 99 1 2.000000 0.00000000
svyby(~ervisit, ~agec, des, svymean, na.rm=T)
## agec ervisit se
## [18,25] [18,25] 1.748836 0.06878496
## (25,35] (25,35] 1.636463 0.04171821
## (35,45] (35,45] 1.673345 0.05258919
## (45,55] (45,55] 1.697424 0.04769689
## (55,65] (55,65] 1.640019 0.04160352
## (65,75] (65,75] 1.455516 0.03446322
## (75,85] (75,85] 1.500475 0.03760934
#Poisson glm fit to survey data
fit1<-svyglm(ervisit~factor(usborn)+factor(smoke)+factor(agec), design=des, family=poisson)
summary(fit1)
##
## Call:
## svyglm(formula = ervisit ~ factor(usborn) + factor(smoke) + factor(agec),
## design = des, family = poisson)
##
## Survey design:
## svydesign(ids = ~1, strata = ~strata, weights = ~perweight, data = nhis_4[is.na(nhis_4$perweight) ==
## F, ])
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 0.77918 0.12794 6.090 1.19e-09 ***
## factor(usborn)12 -0.24915 0.12035 -2.070 0.038471 *
## factor(usborn)20 -0.16630 0.11746 -1.416 0.156907
## factor(usborn)97 -0.73396 0.11922 -6.156 7.89e-10 ***
## factor(usborn)99 0.03849 0.12040 0.320 0.749215
## factor(smoke)1 -0.07313 0.02284 -3.202 0.001370 **
## factor(agec)(25,35] -0.07497 0.04710 -1.592 0.111529
## factor(agec)(35,45] -0.05139 0.05077 -1.012 0.311473
## factor(agec)(45,55] -0.04523 0.04934 -0.917 0.359422
## factor(agec)(55,65] -0.07972 0.04734 -1.684 0.092178 .
## factor(agec)(65,75] -0.19916 0.04681 -4.255 2.12e-05 ***
## factor(agec)(75,85] -0.16298 0.04699 -3.468 0.000527 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for poisson family taken to be 0.9280652)
##
## Number of Fisher Scoring iterations: 5
#here are the poisson model "risk ratios", which just show the change in the mean
round(exp(summary(fit1)$coef[-1,1]), 3)
## factor(usborn)12 factor(usborn)20 factor(usborn)97 factor(usborn)99
## 0.779 0.847 0.480 1.039
## factor(smoke)1 factor(agec)(25,35] factor(agec)(35,45] factor(agec)(45,55]
## 0.929 0.928 0.950 0.956
## factor(agec)(55,65] factor(agec)(65,75] factor(agec)(75,85]
## 0.923 0.819 0.850
- Is the Poisson model a good choice?
##Checking for over dispersion
fit2<-glm(ervisit~factor(usborn)+factor(smoke)+factor(poor), data=nhis_4, family=poisson)
summary(fit2)
##
## Call:
## glm(formula = ervisit ~ factor(usborn) + factor(smoke) + factor(poor),
## family = poisson, data = nhis_4)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -0.9537 -0.5147 -0.4485 0.2989 4.3401
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 0.82333 0.08219 10.017 < 2e-16 ***
## factor(usborn)12 -0.21922 0.08597 -2.550 0.01078 *
## factor(usborn)20 -0.10274 0.08198 -1.253 0.21012
## factor(smoke)1 -0.05677 0.01923 -2.953 0.00315 **
## factor(poor)1 -0.24647 0.02048 -12.037 < 2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for poisson family taken to be 1)
##
## Null deviance: 4559.3 on 6740 degrees of freedom
## Residual deviance: 4386.5 on 6736 degrees of freedom
## (105312 observations deleted due to missingness)
## AIC: 19946
##
## Number of Fisher Scoring iterations: 5
scale<-sqrt(fit2$deviance/fit2$df.residual)
scale
## [1] 0.8069699
##Using the chi square distribution
1-pchisq(fit2$deviance, df = fit2$df.residual)
## [1] 1
##The model fits the data since the p value is one
##Quasi distribution
fit3<-glm(ervisit~factor(usborn)+factor(smoke)+factor(agec), data=nhis_4, family=quasipoisson)
summary(fit3)
##
## Call:
## glm(formula = ervisit ~ factor(usborn) + factor(smoke) + factor(agec),
## family = quasipoisson, data = nhis_4)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -0.8904 -0.5718 -0.4740 0.2352 4.3814
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 0.77570 0.08525 9.099 < 2e-16 ***
## factor(usborn)12 -0.25506 0.08433 -3.025 0.0025 **
## factor(usborn)20 -0.15773 0.08044 -1.961 0.0499 *
## factor(usborn)97 -0.76119 0.99271 -0.767 0.4432
## factor(usborn)99 0.05090 0.70441 0.072 0.9424
## factor(smoke)1 -0.08135 0.01876 -4.338 1.46e-05 ***
## factor(agec)(25,35] -0.03480 0.03434 -1.013 0.3109
## factor(agec)(35,45] -0.05210 0.03607 -1.444 0.1487
## factor(agec)(45,55] -0.01452 0.03461 -0.419 0.6749
## factor(agec)(55,65] -0.07198 0.03492 -2.061 0.0393 *
## factor(agec)(65,75] -0.16193 0.03755 -4.313 1.64e-05 ***
## factor(agec)(75,85] -0.16129 0.03806 -4.238 2.28e-05 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for quasipoisson family taken to be 0.9786789)
##
## Null deviance: 4742.4 on 7044 degrees of freedom
## Residual deviance: 4667.1 on 7033 degrees of freedom
## (105008 observations deleted due to missingness)
## AIC: NA
##
## Number of Fisher Scoring iterations: 5
- Consider a Negative binomial model
library(lmtest)
## Loading required package: zoo
##
## Attaching package: 'zoo'
## The following objects are masked from 'package:base':
##
## as.Date, as.Date.numeric
library(sandwich)
coeftest(fit2, vcov=vcovHC(fit2, type="HC1",cluster="ststr"))
##
## z test of coefficients:
##
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 0.823326 0.093061 8.8471 < 2.2e-16 ***
## factor(usborn)12 -0.219217 0.096802 -2.2646 0.023537 *
## factor(usborn)20 -0.102739 0.093926 -1.0938 0.274026
## factor(smoke)1 -0.056769 0.018835 -3.0141 0.002578 **
## factor(poor)1 -0.246474 0.022273 -11.0662 < 2.2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##Fit the Negative Binomial GLM
library(MASS)
##
## Attaching package: 'MASS'
## The following object is masked from 'package:dplyr':
##
## select
fit.nb1<-glm.nb(ervisit~factor(agec),
data=sub,
weights=perweight/mean(perweight, na.rm=T))
fit.nb2<-glm.nb(ervisit~factor(usborn)+factor(smoke)+factor(agec),
data=sub,
weights=perweight/mean(perweight, na.rm=T))
- Compare the model fits of the alternative models using AIC
#clx2(fit.nb2,cluster =sub$ststr)
tests1<-coeftest(fit.nb1, vcov=vcovHC(fit.nb2, type="HC1",cluster="ststr"))
tests<-coeftest(fit.nb2, vcov=vcovHC(fit.nb2, type="HC1",cluster="ststr"))
library(stargazer)
stargazer(fit.nb1, fit.nb2,style="demography", type = "text", t.auto=F,p.auto=F,coef=list(tests1[, 1],tests[,1]), se =list(tests1[, 2], tests[, 2]), p=list(tests1[,4],tests[, 4]) )
##
## ---------------------------------------------------
## ervisit
## Model 1 Model 2
## ---------------------------------------------------
## factor(usborn)12 -0.334
## (0.179)
## factor(usborn)20 -0.245
## (0.175)
## factor(usborn)97 -0.961***
## (0.179)
## factor(usborn)99 -0.149
## (0.180)
## factor(smoke)1 -0.110**
## (0.039)
## factor(agec)(25,35] -0.008 -0.011
## (0.078) (0.078)
## factor(agec)(35,45] 0.019 0.013
## (0.084) (0.084)
## factor(agec)(45,55] 0.046 0.021
## (0.075) (0.075)
## factor(agec)(55,65] -0.046 -0.077
## (0.070) (0.070)
## factor(agec)(65,75] -0.228*** -0.254***
## (0.069) (0.069)
## factor(agec)(75,85] -0.153* -0.170**
## (0.065) (0.065)
## Constant 0.599** 0.940***
## (0.195) (0.195)
## N 2,818 2,818
## Log Likelihood -4,296.337 -4,283.931
## theta 51.768 (29.248) 59.722 (38.498)
## AIC 8,606.675 8,591.861
## ---------------------------------------------------
## *p < .05; **p < .01; ***p < .001
##Based on the AIC, model 2(16,780.780) is better compared to model one(16,780.780 )
##
AIC(fit.nb1) ##Negative model 1
## [1] 8606.675
AIC(fit.nb2) ##Negative model 2
## [1] 8591.861
AIC(fit1) ##Poisson model 1
## eff.p AIC deltabar
## 12.195393 4471.971650 1.108672
AIC(fit2) ##Poisson model 2
## [1] 19946.48
AIC(fit3) ##Poisson model 3
## [1] NA