library(ipumsr)
library(forcats)
library(gtsummary)
library(car)

## Loading required package: carData

library(sur)

## 
## Attaching package: 'sur'

## The following objects are masked from 'package:carData':
## 
##     Anscombe, States

library(tidyverse)

## ── Attaching packages ─────────────────────────────────────── tidyverse 1.3.1 ──

## ✓ ggplot2 3.3.5     ✓ purrr   0.3.4
## ✓ tibble  3.1.6     ✓ dplyr   1.0.7
## ✓ tidyr   1.2.0     ✓ stringr 1.4.0
## ✓ readr   2.0.1

## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## x dplyr::filter() masks stats::filter()
## x dplyr::lag()    masks stats::lag()
## x dplyr::recode() masks car::recode()
## x purrr::some()   masks car::some()

library(survey)

## Loading required package: grid

## Loading required package: Matrix

## 
## Attaching package: 'Matrix'

## The following objects are masked from 'package:tidyr':
## 
##     expand, pack, unpack

## Loading required package: survival

## 
## Attaching package: 'survey'

## The following object is masked from 'package:graphics':
## 
##     dotchart

library(tibble)
library(dplyr)
library(stargazer)

## 
## Please cite as:

##  Hlavac, Marek (2018). stargazer: Well-Formatted Regression and Summary Statistics Tables.

##  R package version 5.2.2. https://CRAN.R-project.org/package=stargazer

library(grid)
library(Matrix)
library(dplyr, warn.conflicts = FALSE)

Recoding the outcome variable

I recoded (ADDEPEV3) have you ever been told that you have depressive disorder into binary variable. I put “1” for for those who have been told they have depressive disorder and “0” for those who have not been told they have depressive disorder. I have excluded the values 7 and 9 with ‘else=NA’

#ever told you have depressive disorder
brfss20$depressivedisorder<-Recode(brfss20$addepev3, recodes="1=1; 2=0; else = NA")
table(brfss20$depressivedisorder)

## 
##      0      1 
## 157743  36552

error does not mean issue

Analysis

Descriptive Analyses

library(dplyr)
Data<-brfss20%>%
  
  select(depressivedisorder, mmsaname, bmi, agec,race_eth, marst, educ,white, black, hispanic, other, ins, mmsawt, ststr) %>%
  filter( complete.cases(.))

#First we tell R our survey design
options(survey.lonely.psu = "adjust")
des<-svydesign(ids=~1, strata=~ststr, weights=~mmsawt, data =Data )

First, we examine the % of US adults who have been told they have depressive disorder by education level, and do a survey-corrected chi-square test for independence.

Result<-svyby(formula = ~depressivedisorder,
           by = ~educ,
           design = des,
           FUN = svymean,
           na.rm=T)

svychisq(~depressivedisorder+educ,
         design = des)

## 
##  Pearson's X^2: Rao & Scott adjustment
## 
## data:  svychisq(~depressivedisorder + educ, design = des)
## F = 40.295, ndf = 3.6232e+00, ddf = 6.0357e+05, p-value < 2.2e-16

plot of estimates with standard errors

Result%>%
  ggplot()+
  geom_point(aes(x=educ,y=depressivedisorder))+
  geom_errorbar(aes(x=educ, ymin = depressivedisorder-1.96*se, 
                    ymax= depressivedisorder+1.96*se),
                width=.25)+
   labs(title = "Percent % of US Adults who have been told they have depressive disorder", 
        caption = "Source: CDC BRFSS - SMART Data, 2020 \n Calculations by Bryan Solomon",
       x = "Education",
       y = "%  ever been told they have depressive disorder")+
  theme_minimal()

The graph shows that people who have a some high school are more likely to have been told they have depressive disorder. People who are fall under hsgrad, 3somecol, 4colgrad have smaller whiskers, meaning that the sample is larger. People who graduated from college have the smallest percentage of people who have been told they have depressive disorder.

Result2 <-svyby(formula = ~depressivedisorder,
           by = ~race_eth, 
           design = des, 
           FUN = svymean,
           na.rm=T)

svychisq(~depressivedisorder+race_eth,
         design = des)

## 
##  Pearson's X^2: Rao & Scott adjustment
## 
## data:  svychisq(~depressivedisorder + race_eth, design = des)
## F = 52.731, ndf = 3.5211e+00, ddf = 5.8655e+05, p-value < 2.2e-16

Result2 %>%
  ggplot()+
  geom_point(aes(x=race_eth,y=depressivedisorder))+
  geom_errorbar(aes(x=race_eth, ymin = depressivedisorder-1.96*se, 
                    ymax= depressivedisorder+1.96*se),
                width=.25)+
   labs(title = "% depressive disorder by Race/Ethnicity", 
        caption = "Source: CDC BRFSS - SMART Data, 2020 \n Calculations by Bryan Solomon",
       x = "Race/Ethnicity",
       y = "%  with depressive disorder")+
  theme_minimal()

According to this graph NH Whites have the highest % of people who have been notified that they have depressive disorder. They also have the smallest error bars meaning that they have a large population in the sample. The NH black group is close to the hispanic group, both has a smaller % of people who have been notified that they have depressive disorder.

Result3<-svyby(formula = ~depressivedisorder,
              by = ~race_eth+educ,
              design = des,
              FUN = svymean,
              na.rm=T)

Result3%>%
  ggplot()+
  #geom_point(aes(x=educ, y = depressive Disorder, color=race_eth, group=race_eth), position="dodge")+ 
  geom_errorbar(aes(x=educ,y = depressivedisorder,
                    ymin = depressivedisorder-1.96*se, 
                   ymax= depressivedisorder+1.96*se,
                   color=race_eth,
                   group=race_eth),
                width=.25,
                position="dodge")+
  #facet_wrap(~ race_eth, nrow = 3)+
  labs(title = "Percent % of US Adults with depressive disorder by Race/Ethnicity and Education", 
        caption = "Source: CDC BRFSS - SMART Data, 2017 \n Calculations by Bryan Solomon",
       x = "Education",
       y = "depressive disorder")+
  theme_minimal()

According to this graph 4colgrad have the % of people who have depressive disorder in all the ethnicities. NH Whites also have the highest % of being told they have depressive disorder in all 5 educational columns. The error bar for 4colgrad have the smallest error bars meaning it is a larger sample.

fit.logit<-svyglm(depressivedisorder ~ race_eth + agec + educ, design = des, family =  binomial)

## Warning in eval(family$initialize): non-integer #successes in a binomial glm!

library(gtsummary)
fit.logit%>%
  tbl_regression(exponentiate=TRUE )

Characteristic	OR¹	95% CI¹	p-value
race_eth
hispanic	—	—
nh_black	1.16	1.02, 1.32	0.027
nh_multirace	2.22	1.85, 2.67	<0.001
nh_other	0.82	0.70, 0.96	0.017
nhwhite	1.74	1.57, 1.93	<0.001
agec
(0,24]	—	—
(24,39]	0.96	0.87, 1.06	0.5
(39,59]	0.76	0.69, 0.84	<0.001
(59,79]	0.63	0.57, 0.70	<0.001
(79,99]	0.31	0.25, 0.37	<0.001
educ
0Prim	—	—
1somehs	1.55	1.23, 1.95	<0.001
2hsgrad	0.94	0.77, 1.16	0.6
3somecol	1.09	0.88, 1.33	0.4
4colgrad	0.74	0.60, 0.90	0.003
¹ OR = Odds Ratio, CI = Confidence Interval

NH Whites are more than 1.7x more likely to be told they have depressive disorder than Hispanics. NH Blacks are more than 1.1x more likely than Hispanics to be told they have depressive disorder. People 24-39 years of age are 4% less likely to be told they have depressive disorder than those 0-24. People in the oldest age group 79-99 are 69% less likely to have been told they have a depressive disorder. People who have a primary education are used as the reference group. People who have somecol and some HS are more likely to have been told they have depressive disorder, versus someone with a college degree who is 26% less likely to have been told they have a depressive disorder.

library(sjPlot)
plot_model(fit.logit,
          axis.lim = c(.1, 5), 
           title = "Odds Ratios for Depression")

# axis limit is placed in regards to the max and min of x variable

knitr::kable(data.frame(OR = exp(coef(fit.logit)), ci = exp(confint(fit.logit))))

	OR	ci.2.5..	ci.97.5..
(Intercept)	0.2117287	0.1712197	0.2618219
race_ethnh_black	1.1595608	1.0167182	1.3224719
race_ethnh_multirace	2.2210342	1.8507746	2.6653666
race_ethnh_other	0.8203083	0.6974005	0.9648770
race_ethnhwhite	1.7398663	1.5702751	1.9277736
agec(24,39]	0.9644414	0.8746451	1.0634566
agec(39,59]	0.7630024	0.6929627	0.8401213
agec(59,79]	0.6335443	0.5740926	0.6991526
agec(79,99]	0.3050301	0.2519782	0.3692517
educ1somehs	1.5517052	1.2327506	1.9531843
educ2hsgrad	0.9422499	0.7677086	1.1564737
educ3somecol	1.0865640	0.8848239	1.3343007
educ4colgrad	0.7358000	0.6011987	0.9005370

make sure to change the name of each chunk so that the computer does not get confused. Result 1 > Result 2 > Result 3.

homework