library(haven)
library(readr)
setwd('C:/Users/tim1203/Desktop/utsa/C. Sparks/Dem 7283/Data sets')
load("brfss_2017.rdata")
library(car)
## Loading required package: carData
library(stargazer)
## 
## Please cite as:
##  Hlavac, Marek (2018). stargazer: Well-Formatted Regression and Summary Statistics Tables.
##  R package version 5.2.2. https://CRAN.R-project.org/package=stargazer
library(questionr)
## Warning: package 'questionr' was built under R version 3.6.2
library(dplyr)
## 
## Attaching package: 'dplyr'
## The following object is masked from 'package:car':
## 
##     recode
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union

#Recode Mental-Outcome Variable

brfss_17$mental<-Recode(brfss_17$menthlth, recodes="1:30=1; 88=0; else=NA")
table(brfss_17$mental)
## 
##      0      1 
## 152826  74517
table(brfss_17$menthlth)
## 
##      1      2      3      4      5      6      7      8      9     10 
##   7526  11574   6897   3484   8490    973   3285    672    112   6041 
##     11     12     13     14     15     16     17     18     19     20 
##     42    467     72   1244   5567    110     84    130      8   3405 
##     21     22     23     24     25     26     27     28     29     30 
##    217     65     39     58   1169     49     98    329    198  12112 
##     77     88     99 
##   2433 152826   1099

#a. A binary outcome variable is a dichotomous variable usually coded as 0, 1. The variable Menthlth asks how many days during the past 30 days has your mental health not been good? This variable was orginially coded as 1:30 for frequency of days, 88=none, 77=dk, 99=refused. This will be recoded as 1:30 as 1 (yes) and 88=0 (no).

#b Research Question Research Question: The outcome variable that I will be using is mental health. I examine the relationship between gender and level of education with mental health. Do males who are less educated have a greater chance of having mental health issues? #c.ย The predictor variables that will be used are gender, and level of education.

#Recode Education

brfss_17$educa2<-Recode(brfss_17$educa, recodes="1:2='OPrime'; 3='1somehs'; 4='2hsgrad'; 5='3somecol'; 6='4colgrad'; 9=NA", as.factor=T)
brfss_17$educa2<-relevel(brfss_17$educa2, ref='2hsgrad')
table(brfss_17$educa)
## 
##     1     2     3     4     5     6     9 
##   321  4812  9493 55951 62309 97008   979
table(brfss_17$educa2)
## 
##  2hsgrad  1somehs 3somecol 4colgrad   OPrime 
##    55951     9493    62309    97008     5133

Recode sex

library(survey)
## Loading required package: grid
## Loading required package: Matrix
## Loading required package: survival
## Warning: package 'survival' was built under R version 3.6.2
## 
## Attaching package: 'survey'
## The following object is masked from 'package:graphics':
## 
##     dotchart
brfss_17$male<-as.factor(ifelse(brfss_17$sex==1, "Male", "Female"))
table(brfss_17$male)
## 
## Female   Male 
## 128792 102083
table(brfss_17$sex)
## 
##      1      2      9 
## 102083 128633    159
brfss_17$black<-Recode(brfss_17$racegr3, recodes="2=1; 9=NA; else=0")
brfss_17$white<-Recode(brfss_17$racegr3, recodes="1=1; 9=NA; else=0")
brfss_17$other<-Recode(brfss_17$racegr3, recodes="3:4=1; 9=NA; else=0")
brfss_17$hispanic<-Recode(brfss_17$racegr3, recodes="5=1; 9=NA; else=0")

#Descriptive Statistics

table(brfss_17$mental, brfss_17$educa2)
##    
##     2hsgrad 1somehs 3somecol 4colgrad OPrime
##   0   36869    5569    39715    66573   3416
##   1   18010    3604    21674    29516   1482
prop.table(table(brfss_17$mental, brfss_17$educa2), margin=2)
##    
##       2hsgrad   1somehs  3somecol  4colgrad    OPrime
##   0 0.6718235 0.6071078 0.6469400 0.6928264 0.6974275
##   1 0.3281765 0.3928922 0.3530600 0.3071736 0.3025725
chisq.test(table(brfss_17$mental, brfss_17$educa2))
## 
##  Pearson's Chi-squared test
## 
## data:  table(brfss_17$mental, brfss_17$educa2)
## X-squared = 553.54, df = 4, p-value < 2.2e-16

#There is no correlation since the pvalue is low.

options(survey.lonely.psu = "adjust")
des<-svydesign(ids=~1, strata=~ststr, weights=~mmsawt, data = brfss_17)

#Column percentages of level of mental health by level of education, adjusted for weight.

panera<-wtd.table(brfss_17$mental, brfss_17$educa2, weights = brfss_17$mmsawt)
prop.table(wtd.table(brfss_17$mental, brfss_17$educa2, weights=brfss_17$mmsawt), margin=2)
##     2hsgrad   1somehs  3somecol  4colgrad    OPrime
## 0 0.6490324 0.5992037 0.6141548 0.6674332 0.7044474
## 1 0.3509676 0.4007963 0.3858452 0.3325668 0.2955526

#There are differences between the weighted and non weighted percentages. #Standard Errors of Percentages

prop.table(table(brfss_17$mental, brfss_17$educa2), margin = 2)
##    
##       2hsgrad   1somehs  3somecol  4colgrad    OPrime
##   0 0.6718235 0.6071078 0.6469400 0.6928264 0.6974275
##   1 0.3281765 0.3928922 0.3530600 0.3071736 0.3025725
n<-table(is.na(brfss_17$mental)==F)
p<-prop.table(wtd.table(brfss_17$mental, brfss_17$educa2, weights = brfss_17$mmsawt), margin = 2)
se<-(p*(1-p))/n[2]
stargazer(data.frame(proportion=p, se=sqrt(se)), summary = F, type="html", digits=2)
proportion.Var1 proportion.Var2 proportion.Freq se.Var1 se.Var2 se.Freq
1 0 2hsgrad 0.65 0 2hsgrad 0.001
2 1 2hsgrad 0.35 1 2hsgrad 0.001
3 0 1somehs 0.60 0 1somehs 0.001
4 1 1somehs 0.40 1 1somehs 0.001
5 0 3somecol 0.61 0 3somecol 0.001
6 1 3somecol 0.39 1 3somecol 0.001
7 0 4colgrad 0.67 0 4colgrad 0.001
8 1 4colgrad 0.33 1 4colgrad 0.001
9 0 OPrime 0.70 0 OPrime 0.001
10 1 OPrime 0.30 1 OPrime 0.001

Proper survey design analysis

panera1<-svytable(~mental+educa2, design=des)
prop.table(svytable(~mental+educa2, design=des), margin = 2)
  educa2

mental 2hsgrad 1somehs 3somecol 4colgrad OPrime 0 0.6490324 0.5992037 0.6141548 0.6674332 0.7044474 1 0.3509676 0.4007963 0.3858452 0.3325668 0.2955526

stargazer(data.frame(prop.table(svytable(~mental+educa2, design=des), margin = 2)), summary=F, type = "html", digits = 3)
mental educa2 Freq
1 0 2hsgrad 0.649
2 1 2hsgrad 0.351
3 0 1somehs 0.599
4 1 1somehs 0.401
5 0 3somecol 0.614
6 1 3somecol 0.386
7 0 4colgrad 0.667
8 1 4colgrad 0.333
9 0 OPrime 0.704
10 1 OPrime 0.296
panera<-wtd.table(brfss_17$mental, brfss_17$educa2, weights = brfss_17$mmsawt)
prop.table(wtd.table(brfss_17$mental, brfss_17$educa2, weights=brfss_17$mmsawt), margin=2)
##     2hsgrad   1somehs  3somecol  4colgrad    OPrime
## 0 0.6490324 0.5992037 0.6141548 0.6674332 0.7044474
## 1 0.3509676 0.4007963 0.3858452 0.3325668 0.2955526
sv.table<-svyby(formula = ~mental, by = ~educa2, design = des, FUN = svymean, na.rm=T)
sv.table
       educa2    mental          se

2hsgrad 2hsgrad 0.3509676 0.004331290 1somehs 1somehs 0.4007963 0.009748267 3somecol 3somecol 0.3858452 0.004212850 4colgrad 4colgrad 0.3325668 0.003094811 OPrime OPrime 0.2955526 0.012057359

reg2<-lm(mental~educa2+male, data=brfss_17, weights = brfss_17$mmsawt)
summary(reg2)
## 
## Call:
## lm(formula = mental ~ educa2 + male, data = brfss_17, weights = brfss_17$mmsawt)
## 
## Weighted Residuals:
##     Min      1Q  Median      3Q     Max 
## -84.056  -7.199  -3.658   7.056 128.782 
## 
## Coefficients:
##                 Estimate Std. Error t value Pr(>|t|)    
## (Intercept)     0.401265   0.002223 180.506   <2e-16 ***
## educa21somehs   0.048927   0.004137  11.827   <2e-16 ***
## educa23somecol  0.029791   0.002673  11.147   <2e-16 ***
## educa24colgrad -0.022470   0.002665  -8.432   <2e-16 ***
## educa2OPrime   -0.056840   0.004984 -11.404   <2e-16 ***
## maleMale       -0.097904   0.002003 -48.881   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 12.45 on 226422 degrees of freedom
##   (4447 observations deleted due to missingness)
## Multiple R-squared:  0.01379,    Adjusted R-squared:  0.01377 
## F-statistic: 633.2 on 5 and 226422 DF,  p-value: < 2.2e-16
reg1<-lm(mental ~ educa2 + male, data = brfss_17)
summary(reg1)
## 
## Call:
## lm(formula = mental ~ educa2 + male, data = brfss_17)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -0.4325 -0.3478 -0.2791  0.6322  0.7470 
## 
## Coefficients:
##                 Estimate Std. Error t value Pr(>|t|)    
## (Intercept)     0.367754   0.002180 168.729  < 2e-16 ***
## educa21somehs   0.064766   0.005266  12.298  < 2e-16 ***
## educa23somecol  0.022040   0.002743   8.034 9.52e-16 ***
## educa24colgrad -0.019930   0.002498  -7.978 1.50e-15 ***
## educa2OPrime   -0.026083   0.006962  -3.746 0.000179 ***
## maleMale       -0.088659   0.001977 -44.854  < 2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.4669 on 226422 degrees of freedom
##   (4447 observations deleted due to missingness)
## Multiple R-squared:  0.01123,    Adjusted R-squared:  0.01121 
## F-statistic: 514.3 on 5 and 226422 DF,  p-value: < 2.2e-16

#Conclusion We can conclude after completing the analysis that the only statistical significance is found between level of education, gender with mental health. The direction of significance remains the same between regressions.