library(haven)
library(readr)
setwd('C:/Users/tim1203/Desktop/utsa/C. Sparks/Dem 7283/Data sets')
load("brfss_2017.rdata")
library(car)
## Loading required package: carData
library(stargazer)
##
## Please cite as:
## Hlavac, Marek (2018). stargazer: Well-Formatted Regression and Summary Statistics Tables.
## R package version 5.2.2. https://CRAN.R-project.org/package=stargazer
library(questionr)
## Warning: package 'questionr' was built under R version 3.6.2
library(dplyr)
##
## Attaching package: 'dplyr'
## The following object is masked from 'package:car':
##
## recode
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
#Recode Mental-Outcome Variable
brfss_17$mental<-Recode(brfss_17$menthlth, recodes="1:30=1; 88=0; else=NA")
table(brfss_17$mental)
##
## 0 1
## 152826 74517
table(brfss_17$menthlth)
##
## 1 2 3 4 5 6 7 8 9 10
## 7526 11574 6897 3484 8490 973 3285 672 112 6041
## 11 12 13 14 15 16 17 18 19 20
## 42 467 72 1244 5567 110 84 130 8 3405
## 21 22 23 24 25 26 27 28 29 30
## 217 65 39 58 1169 49 98 329 198 12112
## 77 88 99
## 2433 152826 1099
#a. A binary outcome variable is a dichotomous variable usually coded as 0, 1. The variable Menthlth asks how many days during the past 30 days has your mental health not been good? This variable was orginially coded as 1:30 for frequency of days, 88=none, 77=dk, 99=refused. This will be recoded as 1:30 as 1 (yes) and 88=0 (no).
#b Research Question Research Question: The outcome variable that I will be using is mental health. I examine the relationship between gender and level of education with mental health. Do males who are less educated have a greater chance of having mental health issues? #c.ย The predictor variables that will be used are gender, and level of education.
#Recode Education
brfss_17$educa2<-Recode(brfss_17$educa, recodes="1:2='OPrime'; 3='1somehs'; 4='2hsgrad'; 5='3somecol'; 6='4colgrad'; 9=NA", as.factor=T)
brfss_17$educa2<-relevel(brfss_17$educa2, ref='2hsgrad')
table(brfss_17$educa)
##
## 1 2 3 4 5 6 9
## 321 4812 9493 55951 62309 97008 979
table(brfss_17$educa2)
##
## 2hsgrad 1somehs 3somecol 4colgrad OPrime
## 55951 9493 62309 97008 5133
Recode sex
library(survey)
## Loading required package: grid
## Loading required package: Matrix
## Loading required package: survival
## Warning: package 'survival' was built under R version 3.6.2
##
## Attaching package: 'survey'
## The following object is masked from 'package:graphics':
##
## dotchart
brfss_17$male<-as.factor(ifelse(brfss_17$sex==1, "Male", "Female"))
table(brfss_17$male)
##
## Female Male
## 128792 102083
table(brfss_17$sex)
##
## 1 2 9
## 102083 128633 159
brfss_17$black<-Recode(brfss_17$racegr3, recodes="2=1; 9=NA; else=0")
brfss_17$white<-Recode(brfss_17$racegr3, recodes="1=1; 9=NA; else=0")
brfss_17$other<-Recode(brfss_17$racegr3, recodes="3:4=1; 9=NA; else=0")
brfss_17$hispanic<-Recode(brfss_17$racegr3, recodes="5=1; 9=NA; else=0")
#Descriptive Statistics
table(brfss_17$mental, brfss_17$educa2)
##
## 2hsgrad 1somehs 3somecol 4colgrad OPrime
## 0 36869 5569 39715 66573 3416
## 1 18010 3604 21674 29516 1482
prop.table(table(brfss_17$mental, brfss_17$educa2), margin=2)
##
## 2hsgrad 1somehs 3somecol 4colgrad OPrime
## 0 0.6718235 0.6071078 0.6469400 0.6928264 0.6974275
## 1 0.3281765 0.3928922 0.3530600 0.3071736 0.3025725
chisq.test(table(brfss_17$mental, brfss_17$educa2))
##
## Pearson's Chi-squared test
##
## data: table(brfss_17$mental, brfss_17$educa2)
## X-squared = 553.54, df = 4, p-value < 2.2e-16
#There is no correlation since the pvalue is low.
options(survey.lonely.psu = "adjust")
des<-svydesign(ids=~1, strata=~ststr, weights=~mmsawt, data = brfss_17)
#Column percentages of level of mental health by level of education, adjusted for weight.
panera<-wtd.table(brfss_17$mental, brfss_17$educa2, weights = brfss_17$mmsawt)
prop.table(wtd.table(brfss_17$mental, brfss_17$educa2, weights=brfss_17$mmsawt), margin=2)
## 2hsgrad 1somehs 3somecol 4colgrad OPrime
## 0 0.6490324 0.5992037 0.6141548 0.6674332 0.7044474
## 1 0.3509676 0.4007963 0.3858452 0.3325668 0.2955526
#There are differences between the weighted and non weighted percentages. #Standard Errors of Percentages
prop.table(table(brfss_17$mental, brfss_17$educa2), margin = 2)
##
## 2hsgrad 1somehs 3somecol 4colgrad OPrime
## 0 0.6718235 0.6071078 0.6469400 0.6928264 0.6974275
## 1 0.3281765 0.3928922 0.3530600 0.3071736 0.3025725
n<-table(is.na(brfss_17$mental)==F)
p<-prop.table(wtd.table(brfss_17$mental, brfss_17$educa2, weights = brfss_17$mmsawt), margin = 2)
se<-(p*(1-p))/n[2]
stargazer(data.frame(proportion=p, se=sqrt(se)), summary = F, type="html", digits=2)
| proportion.Var1 | proportion.Var2 | proportion.Freq | se.Var1 | se.Var2 | se.Freq | |
| 1 | 0 | 2hsgrad | 0.65 | 0 | 2hsgrad | 0.001 |
| 2 | 1 | 2hsgrad | 0.35 | 1 | 2hsgrad | 0.001 |
| 3 | 0 | 1somehs | 0.60 | 0 | 1somehs | 0.001 |
| 4 | 1 | 1somehs | 0.40 | 1 | 1somehs | 0.001 |
| 5 | 0 | 3somecol | 0.61 | 0 | 3somecol | 0.001 |
| 6 | 1 | 3somecol | 0.39 | 1 | 3somecol | 0.001 |
| 7 | 0 | 4colgrad | 0.67 | 0 | 4colgrad | 0.001 |
| 8 | 1 | 4colgrad | 0.33 | 1 | 4colgrad | 0.001 |
| 9 | 0 | OPrime | 0.70 | 0 | OPrime | 0.001 |
| 10 | 1 | OPrime | 0.30 | 1 | OPrime | 0.001 |
panera1<-svytable(~mental+educa2, design=des)
prop.table(svytable(~mental+educa2, design=des), margin = 2)
educa2
mental 2hsgrad 1somehs 3somecol 4colgrad OPrime 0 0.6490324 0.5992037 0.6141548 0.6674332 0.7044474 1 0.3509676 0.4007963 0.3858452 0.3325668 0.2955526
stargazer(data.frame(prop.table(svytable(~mental+educa2, design=des), margin = 2)), summary=F, type = "html", digits = 3)
| mental | educa2 | Freq | |
| 1 | 0 | 2hsgrad | 0.649 |
| 2 | 1 | 2hsgrad | 0.351 |
| 3 | 0 | 1somehs | 0.599 |
| 4 | 1 | 1somehs | 0.401 |
| 5 | 0 | 3somecol | 0.614 |
| 6 | 1 | 3somecol | 0.386 |
| 7 | 0 | 4colgrad | 0.667 |
| 8 | 1 | 4colgrad | 0.333 |
| 9 | 0 | OPrime | 0.704 |
| 10 | 1 | OPrime | 0.296 |
panera<-wtd.table(brfss_17$mental, brfss_17$educa2, weights = brfss_17$mmsawt)
prop.table(wtd.table(brfss_17$mental, brfss_17$educa2, weights=brfss_17$mmsawt), margin=2)
## 2hsgrad 1somehs 3somecol 4colgrad OPrime
## 0 0.6490324 0.5992037 0.6141548 0.6674332 0.7044474
## 1 0.3509676 0.4007963 0.3858452 0.3325668 0.2955526
sv.table<-svyby(formula = ~mental, by = ~educa2, design = des, FUN = svymean, na.rm=T)
sv.table
educa2 mental se
2hsgrad 2hsgrad 0.3509676 0.004331290 1somehs 1somehs 0.4007963 0.009748267 3somecol 3somecol 0.3858452 0.004212850 4colgrad 4colgrad 0.3325668 0.003094811 OPrime OPrime 0.2955526 0.012057359
reg2<-lm(mental~educa2+male, data=brfss_17, weights = brfss_17$mmsawt)
summary(reg2)
##
## Call:
## lm(formula = mental ~ educa2 + male, data = brfss_17, weights = brfss_17$mmsawt)
##
## Weighted Residuals:
## Min 1Q Median 3Q Max
## -84.056 -7.199 -3.658 7.056 128.782
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 0.401265 0.002223 180.506 <2e-16 ***
## educa21somehs 0.048927 0.004137 11.827 <2e-16 ***
## educa23somecol 0.029791 0.002673 11.147 <2e-16 ***
## educa24colgrad -0.022470 0.002665 -8.432 <2e-16 ***
## educa2OPrime -0.056840 0.004984 -11.404 <2e-16 ***
## maleMale -0.097904 0.002003 -48.881 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 12.45 on 226422 degrees of freedom
## (4447 observations deleted due to missingness)
## Multiple R-squared: 0.01379, Adjusted R-squared: 0.01377
## F-statistic: 633.2 on 5 and 226422 DF, p-value: < 2.2e-16
reg1<-lm(mental ~ educa2 + male, data = brfss_17)
summary(reg1)
##
## Call:
## lm(formula = mental ~ educa2 + male, data = brfss_17)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.4325 -0.3478 -0.2791 0.6322 0.7470
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 0.367754 0.002180 168.729 < 2e-16 ***
## educa21somehs 0.064766 0.005266 12.298 < 2e-16 ***
## educa23somecol 0.022040 0.002743 8.034 9.52e-16 ***
## educa24colgrad -0.019930 0.002498 -7.978 1.50e-15 ***
## educa2OPrime -0.026083 0.006962 -3.746 0.000179 ***
## maleMale -0.088659 0.001977 -44.854 < 2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.4669 on 226422 degrees of freedom
## (4447 observations deleted due to missingness)
## Multiple R-squared: 0.01123, Adjusted R-squared: 0.01121
## F-statistic: 514.3 on 5 and 226422 DF, p-value: < 2.2e-16
#Conclusion We can conclude after completing the analysis that the only statistical significance is found between level of education, gender with mental health. The direction of significance remains the same between regressions.