#load brfss
library(car)
library(stargazer)
library(survey)
library(sjPlot)
library(ggplot2)
library(pander)
library(knitr)
load("~/CSparks_StatsII/brfss16_mmsa.Rdata")
set.seed(12345)
#samps<-sample(1:nrow(brfss16m), size = 40000, replace=F)
#brfss16m<-brfss16m[samps,]
#The names in the data are very ugly, so I make them less ugly
nams<-names(brfss16m)
#we see some names are lower case, some are upper and some have a little _ in the first position. This is a nightmare.
newnames<-gsub(pattern = "_",replacement = "",x = nams)
names(brfss16m)<-tolower(newnames)
#sex
brfss16m$female<-ifelse(brfss16m$sex==2, 1, 0)
#BMI
brfss16m$bmi<-ifelse(is.na(brfss16m$bmi5)==T, NA, brfss16m$bmi5/100)
#Healthy days
brfss16m$healthdays<-recode(brfss16m$physhlth, recodes = "88=0; 77=NA; 99=NA")
#Healthy mental health days
brfss16m$healthmdays<-recode(brfss16m$menthlth, recodes = "88=0; 77=NA; 99=NA")
brfss16m$badhealth<-recode(brfss16m$genhlth, recodes="4:5=1; 1:3=0; else=NA")
#race/ethnicity
brfss16m$black<-recode(brfss16m$racegr3, recodes="2=1; 9=NA; else=0")
brfss16m$white<-recode(brfss16m$racegr3, recodes="1=1; 9=NA; else=0")
brfss16m$other<-recode(brfss16m$racegr3, recodes="3:4=1; 9=NA; else=0")
brfss16m$hispanic<-recode(brfss16m$racegr3, recodes="5=1; 9=NA; else=0")
brfss16m$race_eth<-recode(brfss16m$racegr3,
recodes="1='nhwhite'; 2='nh black'; 3='nh other';4='nh multirace'; 5='hispanic'; else=NA",
as.factor.result = T)
brfss16m$race_eth<-relevel(brfss16m$race_eth, ref = "nhwhite")
#insurance
brfss16m$ins<-recode(brfss16m$hlthpln1, recodes ="7:9=NA; 1=1;2=0")
#income grouping
brfss16m$inc<-recode(brfss16m$incomg, recodes = "9= NA;1='1_lt15k'; 2='2_15-25k';3='3_25-35k';4='4_35-50k';5='5_50kplus'", as.factor.result = T)
brfss16m$inc<-as.ordered(brfss16m$inc)
#education level
brfss16m$educ<-recode(brfss16m$educa,
recodes="1:2='0Prim'; 3='1somehs'; 4='2hsgrad'; 5='3somecol'; 6='4colgrad';9=NA",
as.factor.result=T)
brfss16m$educ<-relevel(brfss16m$educ, ref='2hsgrad')
#employloyment
brfss16m$employ<-recode(brfss16m$employ1,
recodes="1:2='employloyed'; 2:6='nilf'; 7='retired'; 8='unable'; else=NA",
as.factor.result=T)
brfss16m$employ<-relevel(brfss16m$employ, ref='employloyed')
#marital status
brfss16m$marst<-recode(brfss16m$marital,
recodes="1='married'; 2='divorced'; 3='widowed'; 4='separated'; 5='nm';6='cohab'; else=NA",
as.factor.result=T)
brfss16m$marst<-relevel(brfss16m$marst, ref='married')
#Age cut into intervals
brfss16m$agec<-cut(brfss16m$age80, breaks=c(0,24,39,59,79,99))
#BMI, in the brfss16ma the bmi variable has 2 implied decimal places,
#so we must divide by 100 to get real bmi's
brfss16m$bmi<-brfss16m$bmi5/100
#smoking currently
brfss16m$smoke<-recode(brfss16m$smoker3,
recodes="1:2=1; 3:4=0; else=NA")
#brfss16m$smoke<-relevel(brfss16m$smoke, ref = "NeverSmoked")
brfss16m$obese<-ifelse(is.na(brfss16m$bmi)==T, NA,
ifelse(brfss16m$bmi>30,1,0))
head(data.frame(name=table(brfss16m$mmsaname),id=unique(brfss16m$mmsa)))
## name.Var1
## 1 Albany-Schenectady-Troy, NY, Metropolitan Statistical Area
## 2 Albuquerque, NM, Metropolitan Statistical Area
## 3 Allentown-Bethlehem-Easton, PA-NJ, Metropolitan Statistical Area
## 4 Anchorage, AK, Metropolitan Statistical Area
## 5 Atlanta-Sandy Springs-Roswell, GA, Metropolitan Statistical Area
## 6 Augusta-Richmond County, GA-SC, Metropolitan Statistical Area
## name.Freq id
## 1 2901 10580
## 2 1300 10740
## 3 675 10900
## 4 1028 11260
## 5 2444 12060
## 6 849 12260
length(table(brfss16m$mmsa))
## [1] 143
#MSAs
library(acs)
#Get 2010 ACS median household incomes for tracts in Texas
msaacs<-geo.make(msa="*")
#ACS tables B17 is poverty, B19 is Gini, B25001 is housing vacancy, B25035 is median year built
acsecon<-acs.fetch(key=mykey, endyear=2010, span=5, geography=msaacs, variable = c("B19083_001","B17001_001","B17001_002" , "B25002_001","B25002_003", "B25035_001"))
colnames(acsecon@estimate)
## [1] "B19083_001" "B17001_001" "B17001_002" "B25002_001" "B25002_003"
## [6] "B25035_001"
msaecon<-data.frame(gini=acsecon@estimate[, "B19083_001"],
ppoverty=acsecon@estimate[, "B17001_002"]/acsecon@estimate[, "B17001_001"],
giniz=scale(acsecon@estimate[, "B19083_001"]),
pvacant=acsecon@estimate[,"B25002_003"]/acsecon@estimate[, "B25002_001"],
ppovertyz=scale(acsecon@estimate[, "B17001_002"]/acsecon@estimate[, "B17001_001"]),
pvacantz=scale(acsecon@estimate[,"B25002_003"]/acsecon@estimate[, "B25002_001"]),
medhouse=acsecon@estimate[, "B25035_001" ],
medhousez=scale(acsecon@estimate[, "B25035_001" ]))
msaecon$ids<-paste(acsecon@geography$metropolitanstatisticalareamicropolitanstatisticalarea)
head(msaecon)
## gini ppoverty giniz
## Adjuntas, PR Micro Area 0.525 0.5925656 2.718785
## Aguadilla-Isabela-San Sebastián, PR Metro Area 0.529 0.5577311 2.847562
## Coamo, PR Micro Area 0.532 0.5633984 2.944145
## Fajardo, PR Metro Area 0.483 0.4309070 1.366623
## Guayama, PR Metro Area 0.518 0.4980518 2.493425
## Jayuya, PR Micro Area 0.502 0.5446137 1.978315
## pvacant ppovertyz
## Adjuntas, PR Micro Area 0.1996029 6.271934
## Aguadilla-Isabela-San Sebastián, PR Metro Area 0.2005956 5.762039
## Coamo, PR Micro Area 0.1500353 5.844994
## Fajardo, PR Metro Area 0.3143172 3.905632
## Guayama, PR Metro Area 0.1951184 4.888474
## Jayuya, PR Micro Area 0.1562109 5.570031
## pvacantz medhouse
## Adjuntas, PR Micro Area 0.9053869 1978
## Aguadilla-Isabela-San Sebastián, PR Metro Area 0.9199516 1979
## Coamo, PR Micro Area 0.1781186 1979
## Fajardo, PR Metro Area 2.5885036 1977
## Guayama, PR Metro Area 0.8395882 1978
## Jayuya, PR Micro Area 0.2687281 1978
## medhousez ids
## Adjuntas, PR Micro Area 0.4923898 10260
## Aguadilla-Isabela-San Sebastián, PR Metro Area 0.5982027 10380
## Coamo, PR Micro Area 0.5982027 17620
## Fajardo, PR Metro Area 0.3865770 21940
## Guayama, PR Metro Area 0.4923898 25020
## Jayuya, PR Micro Area 0.4923898 27580
summary(msaecon)
## gini ppoverty giniz pvacant
## Min. :0.3510 Min. :0.02425 Min. :-2.88303 Min. :0.04246
## 1st Qu.:0.4190 1st Qu.:0.12308 1st Qu.:-0.69381 1st Qu.:0.09454
## Median :0.4390 Median :0.15172 Median :-0.04993 Median :0.12076
## Mean :0.4406 Mean :0.16409 Mean : 0.00000 Mean :0.13790
## 3rd Qu.:0.4590 3rd Qu.:0.18746 3rd Qu.: 0.59396 3rd Qu.:0.15964
## Max. :0.5730 Max. :0.59257 Max. : 4.26411 Max. :0.63877
## ppovertyz pvacantz medhouse medhousez
## Min. :-2.0468 Min. :-1.4002 Min. :1940 Min. :-3.5285
## 1st Qu.:-0.6002 1st Qu.:-0.6361 1st Qu.:1968 1st Qu.:-0.5657
## Median :-0.1810 Median :-0.2514 Median :1975 Median : 0.1750
## Mean : 0.0000 Mean : 0.0000 Mean :1973 Mean : 0.0000
## 3rd Qu.: 0.3421 3rd Qu.: 0.3190 3rd Qu.:1980 3rd Qu.: 0.7040
## Max. : 6.2719 Max. : 7.3489 Max. :1998 Max. : 2.6086
## ids
## Length:955
## Class :character
## Mode :character
##
##
##
joindata<-merge(brfss16m, msaecon, by.x="mmsa",by.y="ids", all.x=T)
joindata$healthmdaysz<-as.numeric(scale(joindata$healthmdays, center=T, scale=T))
library(dplyr)
##
## Attaching package: 'dplyr'
## The following object is masked from 'package:acs':
##
## combine
## The following object is masked from 'package:car':
##
## recode
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
joindata<-joindata%>%
select(healthmdaysz, marst, female, mmsa, agec, educ, race_eth, employ, healthmdays, badhealth,bmi,gini, ppoverty, ppovertyz,giniz,medhouse,medhousez, pvacant, pvacantz, mmsawt, mmsaname )%>%
filter(complete.cases(.))
head(joindata[, c("healthmdaysz", "marst", "female", "educ","ppoverty", "mmsa")])
## healthmdaysz marst female educ ppoverty mmsa
## 1 0.2095226 widowed 1 3somecol 0.1053253 10580
## 2 -0.4434798 married 1 2hsgrad 0.1053253 10580
## 3 -0.4434798 divorced 0 4colgrad 0.1053253 10580
## 4 -0.3128793 divorced 0 4colgrad 0.1053253 10580
## 5 -0.4434798 widowed 0 2hsgrad 0.1053253 10580
## 6 -0.4434798 widowed 1 3somecol 0.1053253 10580
meanhealthmdays<-mean(joindata$healthmdays, na.rm=T)
sdhealthmdays<-sd(joindata$healthmdays, na.rm=T)
fit.an<-lm(healthmdaysz~as.factor(mmsa), joindata)
anova(fit.an)
## Analysis of Variance Table
##
## Response: healthmdaysz
## Df Sum Sq Mean Sq F value Pr(>F)
## as.factor(mmsa) 122 674 5.5250 5.4506 < 2.2e-16 ***
## Residuals 173935 176311 1.0137
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
fit.marst<-glm(marst~as.factor(mmsa),family=binomial, joindata)
anova(fit.marst, test="Chisq")
## Analysis of Deviance Table
##
## Model: binomial, link: logit
##
## Response: marst
##
## Terms added sequentially (first to last)
##
##
## Df Deviance Resid. Df Resid. Dev Pr(>Chi)
## NULL 174057 240850
## as.factor(mmsa) 122 1566.3 173935 239284 < 2.2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
library(lme4)
library(lmerTest)
library(arm)
Basic hierarchical model for means of each MSA:
fit<-lmer(healthmdaysz~(1|mmsa), data=joindata)
arm::display(fit, detail=T)
## lme4::lmer(formula = healthmdaysz ~ (1 | mmsa), data = joindata)
## coef.est coef.se t value
## (Intercept) 0.01 0.01 2.29
##
## Error terms:
## Groups Name Std.Dev.
## mmsa (Intercept) 0.06
## Residual 1.01
## ---
## number of obs: 174058, groups: mmsa, 123
## AIC = 496542, DIC = 496519.3
## deviance = 496527.6
joindata$predhealthmdaysz<-sdhealthmdays*(fitted(fit))+meanhealthmdays
head(ranef(fit))
## $mmsa
## (Intercept)
## 10580 1.221694e-02
## 10740 1.778700e-02
## 10900 -3.443115e-02
## 11260 -2.233782e-03
## 12060 -4.096259e-02
## 12260 -3.684353e-02
## 12420 -5.239642e-02
## 12580 -4.075729e-02
## 12940 4.983805e-02
## 13220 2.236786e-01
## 13620 1.031808e-02
## 13740 -4.017702e-02
## 13780 -1.121001e-02
## 13820 4.705872e-02
## 13900 -1.190693e-01
## 14260 -1.134951e-02
## 15380 2.907223e-02
## 15540 -3.716275e-02
## 16300 -4.429230e-03
## 16620 9.517001e-02
## 16700 5.785060e-02
## 16740 -2.786830e-02
## 16860 5.961286e-02
## 16980 -3.060025e-02
## 17140 5.552620e-02
## 17200 2.446242e-02
## 17460 -8.197855e-02
## 17780 -5.129326e-02
## 17820 -3.343795e-02
## 17900 4.285210e-02
## 18140 1.318036e-02
## 18580 -1.048966e-02
## 18880 1.087152e-02
## 19060 4.135640e-02
## 19380 -2.709001e-02
## 19660 1.586841e-02
## 19740 -5.830413e-02
## 19780 -5.850697e-02
## 20260 -3.862911e-02
## 21340 3.167846e-02
## 22020 -1.148549e-01
## 22220 -1.073678e-02
## 23060 -3.429489e-02
## 23540 6.240990e-02
## 24020 -3.013890e-02
## 24220 -3.716742e-02
## 24260 -1.033044e-01
## 24340 -2.968911e-02
## 24860 4.424383e-02
## 25180 5.717829e-02
## 25540 -4.903762e-02
## 25940 -4.556488e-02
## 26420 -4.717127e-02
## 26580 1.438714e-01
## 26900 -1.821463e-02
## 27140 -9.293085e-03
## 27260 4.070699e-02
## 28140 -5.790679e-02
## 28700 7.011104e-02
## 28940 9.266624e-02
## 29620 6.392352e-02
## 30700 -7.000426e-02
## 30780 4.912718e-02
## 30860 -8.548411e-03
## 31140 2.516124e-02
## 32820 4.625063e-02
## 33100 -3.596788e-02
## 33340 1.141608e-02
## 33460 -9.218951e-02
## 33500 -1.183226e-01
## 34820 2.035428e-02
## 34980 4.423369e-02
## 35380 3.741299e-02
## 35740 -6.297094e-02
## 35820 -2.090268e-02
## 35840 -5.994935e-02
## 36260 -1.134210e-02
## 36420 5.518900e-02
## 36540 -4.866474e-02
## 36740 9.125514e-03
## 37460 3.111239e-02
## 37860 5.633366e-02
## 38060 -4.014536e-02
## 38300 3.412177e-02
## 38860 -2.456656e-02
## 38900 3.111454e-02
## 38940 -5.391193e-02
## 39300 4.250922e-02
## 39340 4.031629e-05
## 39580 1.588562e-02
## 39660 -2.021522e-02
## 39900 9.775119e-02
## 40060 -2.121163e-02
## 40140 2.839155e-02
## 40340 -7.180005e-02
## 40380 4.089228e-02
## 40900 1.823694e-02
## 41060 -4.291375e-02
## 41180 3.014129e-02
## 41420 7.573486e-02
## 41540 -2.091047e-02
## 41620 1.157710e-02
## 41700 2.260075e-02
## 41940 -4.884423e-02
## 41980 6.271630e-02
## 42420 -6.412893e-03
## 43580 -7.404362e-02
## 43620 -1.171375e-01
## 43900 6.243606e-02
## 44060 -1.952962e-04
## 44140 7.643165e-02
## 45060 4.612151e-03
## 45220 -3.288907e-02
## 45300 3.039241e-02
## 45780 1.592946e-02
## 45820 -1.726271e-02
## 46140 -3.986402e-02
## 46220 1.131335e-01
## 46540 3.306045e-02
## 47260 -5.288605e-02
## 48620 -4.026408e-02
## 48660 2.744136e-02
## 49340 1.655470e-02
dim(ranef(fit)$mmsa)
## [1] 123 1
rate<- fixef(fit)+ranef(fit)$mmsa
est<-data.frame(rate =rate, id=rownames(ranef(fit)$mmsa))
citymeans<-aggregate(cbind(healthmdays, predhealthmdaysz)~mmsaname,joindata, mean)
head(citymeans, n=10)
## mmsaname
## 1 Albany-Schenectady-Troy, NY, Metropolitan Statistical Area
## 2 Albuquerque, NM, Metropolitan Statistical Area
## 3 Allentown-Bethlehem-Easton, PA-NJ, Metropolitan Statistical Area
## 4 Anchorage, AK, Metropolitan Statistical Area
## 5 Atlanta-Sandy Springs-Roswell, GA, Metropolitan Statistical Area
## 6 Augusta-Richmond County, GA-SC, Metropolitan Statistical Area
## 7 Austin-Round Rock, TX, Metropolitan Statistical Area
## 8 Baltimore-Columbia-Towson, MD, Metropolitan Statistical Area
## 9 Baton Rouge, LA, Metropolitan Statistical Area
## 10 Beckley, WV, Metropolitan Statistical Area
## healthmdays predhealthmdaysz
## 1 3.609295 3.650866
## 2 3.674419 3.693873
## 3 3.124789 3.290692
## 4 3.483871 3.539291
## 5 3.152363 3.240263
## 6 3.125984 3.272066
## 7 3.028694 3.151981
## 8 3.179884 3.241848
## 9 4.070780 3.941341
## 10 6.135628 5.283576
sqrt
## function (x) .Primitive("sqrt")
plot(predhealthmdaysz~healthmdays, citymeans)
fit2<-lmer(healthmdaysz~marst+female+educ+(1|mmsa), data=joindata, na.action=na.omit)
arm::display(fit2, detail=T)
## lme4::lmer(formula = healthmdaysz ~ marst + female + educ + (1 |
## mmsa), data = joindata, na.action = na.omit)
## coef.est coef.se t value
## (Intercept) -0.12 0.01 -15.33
## marstcohab 0.27 0.01 19.81
## marstdivorced 0.24 0.01 33.13
## marstnm 0.26 0.01 38.19
## marstseparated 0.53 0.02 30.80
## marstwidowed 0.01 0.01 0.84
## female 0.15 0.00 30.45
## educ0Prim 0.14 0.02 8.04
## educ1somehs 0.21 0.01 17.30
## educ3somecol -0.01 0.01 -0.80
## educ4colgrad -0.15 0.01 -24.87
##
## Error terms:
## Groups Name Std.Dev.
## mmsa (Intercept) 0.05
## Residual 0.99
## ---
## number of obs: 174058, groups: mmsa, 123
## AIC = 490562, DIC = 490366
## deviance = 490451.1
anova(fit, fit2)
## refitting model(s) with ML (instead of REML)
## Data: joindata
## Models:
## object: healthmdaysz ~ (1 | mmsa)
## ..1: healthmdaysz ~ marst + female + educ + (1 | mmsa)
## Df AIC BIC logLik deviance Chisq Chi Df Pr(>Chisq)
## object 3 496534 496564 -248264 496528
## ..1 13 490477 490608 -245226 490451 6076.5 10 < 2.2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
rand(fit2)
## Analysis of Random effects Table:
## Chi.sq Chi.DF p.value
## mmsa 180 1 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
fit.mix<-lmer(healthmdaysz~marst+female+educ+(1|mmsa), data=joindata)
rand(fit.mix)
## Analysis of Random effects Table:
## Chi.sq Chi.DF p.value
## mmsa 180 1 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
display(fit.mix, detail=T)
## lme4::lmer(formula = healthmdaysz ~ marst + female + educ + (1 |
## mmsa), data = joindata)
## coef.est coef.se t value
## (Intercept) -0.12 0.01 -15.33
## marstcohab 0.27 0.01 19.81
## marstdivorced 0.24 0.01 33.13
## marstnm 0.26 0.01 38.19
## marstseparated 0.53 0.02 30.80
## marstwidowed 0.01 0.01 0.84
## female 0.15 0.00 30.45
## educ0Prim 0.14 0.02 8.04
## educ1somehs 0.21 0.01 17.30
## educ3somecol -0.01 0.01 -0.80
## educ4colgrad -0.15 0.01 -24.87
##
## Error terms:
## Groups Name Std.Dev.
## mmsa (Intercept) 0.05
## Residual 0.99
## ---
## number of obs: 174058, groups: mmsa, 123
## AIC = 490562, DIC = 490366
## deviance = 490451.1
fixcoef<-fit.an$coefficients[1]+fit.an$coefficients[-1]
fixcoef<-(fixcoef*sdhealthmdays)+meanhealthmdays
rancoefs1<-ranef(fit.mix)$mmsa+fixef(fit.mix)[1]
rancoefs1<-(rancoefs1*sdhealthmdays)+meanhealthmdays
summary(rancoefs1)
## (Intercept)
## Min. :1.864
## 1st Qu.:2.336
## Median :2.555
## Mean :2.557
## 3rd Qu.:2.762
## Max. :3.827
fit.mix3<-lmer(healthmdaysz~female+marst+educ+(marst|mmsa), joindata, REML=F)
## Warning in checkConv(attr(opt, "derivs"), opt$par, ctrl = control
## $checkConv, : unable to evaluate scaled gradient
## Warning in checkConv(attr(opt, "derivs"), opt$par, ctrl = control
## $checkConv, : Model failed to converge: degenerate Hessian with 1 negative
## eigenvalues
display(fit.mix3, detail=T)
## lme4::lmer(formula = healthmdaysz ~ female + marst + educ + (marst |
## mmsa), data = joindata, REML = F)
## coef.est coef.se t value
## (Intercept) -0.12 0.01 -20.52
## female 0.15 0.00 30.56
## marstcohab 0.29 0.02 15.86
## marstdivorced 0.25 0.01 23.36
## marstnm 0.27 0.01 27.22
## marstseparated 0.58 0.03 18.24
## marstwidowed 0.01 0.01 1.22
## educ0Prim 0.15 0.02 8.27
## educ1somehs 0.21 0.01 17.40
## educ3somecol -0.01 0.01 -0.95
## educ4colgrad -0.15 0.01 -25.17
##
## Error terms:
## Groups Name Std.Dev. Corr
## mmsa (Intercept) 0.00
## marstcohab 0.11 NaN
## marstdivorced 0.08 NaN 0.15
## marstnm 0.07 NaN 0.63 0.55
## marstseparated 0.27 NaN 0.69 0.49 0.54
## marstwidowed 0.07 NaN 0.58 0.34 0.55 0.50
## Residual 0.99
## ---
## number of obs: 174058, groups: mmsa, 123
## AIC = 490433, DIC = 490366.9
## deviance = 490366.9
anova(fit.mix, fit.mix3)
## refitting model(s) with ML (instead of REML)
## Data: joindata
## Models:
## object: healthmdaysz ~ marst + female + educ + (1 | mmsa)
## ..1: healthmdaysz ~ female + marst + educ + (marst | mmsa)
## Df AIC BIC logLik deviance Chisq Chi Df Pr(>Chisq)
## object 13 490477 490608 -245226 490451
## ..1 33 490433 490765 -245183 490367 84.219 20 7.46e-10 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1