Behavioral Risk Factor Surveillance System (BRFSS) at CDC: World’s largest survey Monthly telephone interviews 18 years old of age or older living in households All 50 states, the District of Columbia, Puerto Rico, Guam and the Virgin Islands. This analysis uses pooled data for 2006-2015. Some variables (e.g. ‘life satisfaction’ is asked only in certain years). Pooled sample is approximately 3 million households Measures on
Health behaviors e.g.smoking, binge drinking, exercise Health outcomes such as obesity, diabetes, mental well-being, Demographics (e.g. Age, Education), Additional ‘county level’ measures to capture neighborhood context (e.g. Crime, Fast food, Religion/Voting).
Current Sample
cdc <- import("cdc_data.csv")
datatable(head(cdc))
str(cdc)
## 'data.frame': 103467 obs. of 13 variables:
## $ STATE : chr "Alabama" "Alabama" "Alabama" "Alabama" ...
## $ FIPS : int 1 1 1 1 1 1 1 1 1 1 ...
## $ Red_Blue16: chr "Red" "Red" "Red" "Red" ...
## $ REGION : chr "South" "South" "South" "South" ...
## $ iyear : int 2013 2014 2014 2014 2015 2014 2014 2014 2013 2015 ...
## $ age : chr "60 to 69" "80 or older" "60 to 69" "50 to 59" ...
## $ income : chr "50-75K" "35-50K" "25-35K" "15-20K" ...
## $ Education : chr "Some College" "Some College" "High School" "No High School" ...
## $ Gender : chr "Male" "Male" "Female" "Male" ...
## $ Race : chr "White" "Black" "White" "White" ...
## $ Obese : int 0 0 1 0 1 0 0 1 0 1 ...
## $ smoker : int 0 0 0 0 0 0 1 1 0 0 ...
## $ diabetic : int 0 0 0 0 1 0 0 0 0 0 ...
# Reorder levels of variables
# correct order of education
## Reordering cdc$Education
cdc$Education <- factor(cdc$Education, levels=c("No High School", "High School", "Some College", "College+"))
## Reordering cdc$age
cdc$age <- factor(cdc$age, levels=c("18 to 29", "30 to 39", "40 to 49", "50 to 59", "60 to 69", "70 to 79", "80 or older"))
Diabetes Rates
by_education <- cdc %>%
group_by(Education, Gender) %>%
summarise(diab = mean(smoker))
knitr::kable(by_education)
Education | Gender | diab |
---|---|---|
No High School | Female | 0.2676354 |
No High School | Male | 0.3310914 |
High School | Female | 0.2052427 |
High School | Male | 0.2371855 |
Some College | Female | 0.1726406 |
Some College | Male | 0.1904643 |
College+ | Female | 0.0762816 |
College+ | Male | 0.0780837 |
#############################################################
# Start with Simple Regression
###############################################################
# Linear Prob
fit1 <- lm(smoker ~ Education*Gender*Race, data=cdc) # Note: The standalone variables are also included as independed variables
stargazer(fit1, type = "text",
dep.var.labels.include = TRUE, column.labels = c("Linear", "Linear-Interation"))
##
## =======================================================================
## Dependent variable:
## ----------------------------
## smoker
## Linear
## -----------------------------------------------------------------------
## EducationHigh School -0.043**
## (0.018)
##
## EducationSome College -0.080***
## (0.018)
##
## EducationCollege+ -0.179***
## (0.018)
##
## GenderMale 0.138***
## (0.023)
##
## RaceOther -0.096***
## (0.019)
##
## RaceWhite 0.046***
## (0.017)
##
## EducationHigh School:GenderMale -0.074***
## (0.028)
##
## EducationSome College:GenderMale -0.084***
## (0.028)
##
## EducationCollege+:GenderMale -0.123***
## (0.028)
##
## EducationHigh School:RaceOther 0.069***
## (0.023)
##
## EducationSome College:RaceOther 0.096***
## (0.023)
##
## EducationCollege+:RaceOther 0.090***
## (0.023)
##
## EducationHigh School:RaceWhite -0.066***
## (0.020)
##
## EducationSome College:RaceWhite -0.065***
## (0.020)
##
## EducationCollege+:RaceWhite -0.060***
## (0.020)
##
## GenderMale:RaceOther -0.042
## (0.028)
##
## GenderMale:RaceWhite -0.105***
## (0.026)
##
## EducationHigh School:GenderMale:RaceOther 0.031
## (0.034)
##
## EducationSome College:GenderMale:RaceOther -0.008
## (0.035)
##
## EducationCollege+:GenderMale:RaceOther 0.055
## (0.035)
##
## EducationHigh School:GenderMale:RaceWhite 0.067**
## (0.030)
##
## EducationSome College:GenderMale:RaceWhite 0.068**
## (0.031)
##
## EducationCollege+:GenderMale:RaceWhite 0.089***
## (0.031)
##
## Constant 0.267***
## (0.015)
##
## -----------------------------------------------------------------------
## Observations 103,467
## R2 0.041
## Adjusted R2 0.040
## Residual Std. Error 0.359 (df = 103443)
## F Statistic 189.943*** (df = 23; 103443)
## =======================================================================
## Note: *p<0.1; **p<0.05; ***p<0.01
Education & Age
fit1<-lm(diabetic~ Race*age*Red_Blue16, data=cdc)
stargazer(fit1, type = "text",
dep.var.labels.include = TRUE, column.labels = c("Linear", "Linear-Interation"))
##
## ===================================================================
## Dependent variable:
## ----------------------------
## diabetic
## Linear
## -------------------------------------------------------------------
## RaceOther 0.003
## (0.022)
##
## RaceWhite 0.004
## (0.021)
##
## age30 to 39 0.055**
## (0.025)
##
## age40 to 49 0.065***
## (0.024)
##
## age50 to 59 0.193***
## (0.023)
##
## age60 to 69 0.265***
## (0.024)
##
## age70 to 79 0.369***
## (0.026)
##
## age80 or older 0.265***
## (0.033)
##
## Red_Blue16Red 0.002
## (0.024)
##
## RaceOther:age30 to 39 -0.040
## (0.029)
##
## RaceWhite:age30 to 39 -0.046*
## (0.027)
##
## RaceOther:age40 to 49 0.019
## (0.028)
##
## RaceWhite:age40 to 49 -0.027
## (0.026)
##
## RaceOther:age50 to 59 -0.035
## (0.027)
##
## RaceWhite:age50 to 59 -0.119***
## (0.025)
##
## RaceOther:age60 to 69 0.006
## (0.027)
##
## RaceWhite:age60 to 69 -0.127***
## (0.025)
##
## RaceOther:age70 to 79 -0.081***
## (0.031)
##
## RaceWhite:age70 to 79 -0.196***
## (0.027)
##
## RaceOther:age80 or older -0.049
## (0.042)
##
## RaceWhite:age80 or older -0.114***
## (0.035)
##
## RaceOther:Red_Blue16Red -0.007
## (0.028)
##
## RaceWhite:Red_Blue16Red -0.002
## (0.025)
##
## age30 to 39:Red_Blue16Red -0.020
## (0.031)
##
## age40 to 49:Red_Blue16Red 0.056*
## (0.030)
##
## age50 to 59:Red_Blue16Red 0.045
## (0.029)
##
## age60 to 69:Red_Blue16Red 0.087***
## (0.029)
##
## age70 to 79:Red_Blue16Red 0.020
## (0.033)
##
## age80 or older:Red_Blue16Red 0.041
## (0.042)
##
## RaceOther:age30 to 39:Red_Blue16Red 0.030
## (0.037)
##
## RaceWhite:age30 to 39:Red_Blue16Red 0.021
## (0.033)
##
## RaceOther:age40 to 49:Red_Blue16Red -0.037
## (0.036)
##
## RaceWhite:age40 to 49:Red_Blue16Red -0.039
## (0.032)
##
## RaceOther:age50 to 59:Red_Blue16Red 0.031
## (0.034)
##
## RaceWhite:age50 to 59:Red_Blue16Red -0.014
## (0.030)
##
## RaceOther:age60 to 69:Red_Blue16Red -0.064*
## (0.036)
##
## RaceWhite:age60 to 69:Red_Blue16Red -0.052*
## (0.031)
##
## RaceOther:age70 to 79:Red_Blue16Red 0.028
## (0.041)
##
## RaceWhite:age70 to 79:Red_Blue16Red 0.010
## (0.034)
##
## RaceOther:age80 or older:Red_Blue16Red -0.005
## (0.056)
##
## RaceWhite:age80 or older:Red_Blue16Red -0.025
## (0.044)
##
## Constant 0.011
## (0.020)
##
## -------------------------------------------------------------------
## Observations 103,467
## R2 0.062
## Adjusted R2 0.061
## Residual Std. Error 0.327 (df = 103425)
## F Statistic 166.333*** (df = 41; 103425)
## ===================================================================
## Note: *p<0.1; **p<0.05; ***p<0.01
dat1 <- ggpredict(fit1, terms = c("age", "Race", "Red_Blue16"))
plot(dat1)