DEM 7273 Midterm

#Packages

library(haven)
library(dplyr)

## 
## Attaching package: 'dplyr'

## The following objects are masked from 'package:stats':
## 
##     filter, lag

## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union

library(readr)
library(ggpubr)

## Loading required package: ggplot2

library(ggplot2)
library(Rmisc)

## Loading required package: lattice

## Loading required package: plyr

## ------------------------------------------------------------------------------

## You have loaded plyr after dplyr - this is likely to cause problems.
## If you need functions from both plyr and dplyr, please load plyr first, then dplyr:
## library(plyr); library(dplyr)

## ------------------------------------------------------------------------------

## 
## Attaching package: 'plyr'

## The following object is masked from 'package:ggpubr':
## 
##     mutate

## The following objects are masked from 'package:dplyr':
## 
##     arrange, count, desc, failwith, id, mutate, rename, summarise,
##     summarize

library(knitr)
library(tidyverse)

## -- Attaching packages ----------------------------------------------- tidyverse 1.3.0 --

## v tibble  3.0.3     v stringr 1.4.0
## v tidyr   1.1.1     v forcats 0.5.0
## v purrr   0.3.4

## -- Conflicts -------------------------------------------------- tidyverse_conflicts() --
## x plyr::arrange()   masks dplyr::arrange()
## x purrr::compact()  masks plyr::compact()
## x plyr::count()     masks dplyr::count()
## x plyr::failwith()  masks dplyr::failwith()
## x dplyr::filter()   masks stats::filter()
## x plyr::id()        masks dplyr::id()
## x dplyr::lag()      masks stats::lag()
## x plyr::mutate()    masks ggpubr::mutate(), dplyr::mutate()
## x plyr::rename()    masks dplyr::rename()
## x plyr::summarise() masks dplyr::summarise()
## x plyr::summarize() masks dplyr::summarize()

#Data

library(haven)
PA_Mortality <- read_dta("PA_Mortality.dta")
View(PA_Mortality)

#Question 5

boxplot(PA_Mortality$povrate,main = "County Level Poverty Rate")

median(PA_Mortality$povrate)

## [1] 0.1245455

IQR(PA_Mortality$povrate)

## [1] 0.04532328

min(PA_Mortality$povrate)

## [1] 0.0487346

max(PA_Mortality$povrate)

## [1] 0.2415866

ggdensity(PA_Mortality$povrate)

The density graph of poverty rate values produces a bell-shaped curve. This shape, while not completely smooth, closely resembles the normal distribution enough to conclude normality.

pamort <- PA_Mortality %>% 
  transmute(
    mort = ifelse(avemort <= 8, "Low Mortality", "High Mortality"),
    ineq = ifelse(gini <=0.4, "Equal","Unequal") 
      
    
  )
print(pamort)

## # A tibble: 67 x 2
##    mort           ineq   
##    <chr>          <chr>  
##  1 High Mortality Equal  
##  2 High Mortality Unequal
##  3 High Mortality Unequal
##  4 High Mortality Unequal
##  5 Low Mortality  Unequal
##  6 High Mortality Unequal
##  7 High Mortality Unequal
##  8 High Mortality Unequal
##  9 High Mortality Unequal
## 10 High Mortality Unequal
## # ... with 57 more rows

There are 52 counties with high mortality

count(pamort$mort)

##                x freq
## 1 High Mortality   52
## 2  Low Mortality   15

There are 56 counties with an unequal gini coefficient

count(pamort$ineq)

##         x freq
## 1   Equal   11
## 2 Unequal   56

pamortl <- PA_Mortality %>% 
  filter(avemort <= 8)
print(pamortl)

## # A tibble: 15 x 13
##    cofips name  avemort  gini  depriv povrate pubassis fmlhhd nhispwht nhispblk
##     <dbl> <chr>   <dbl> <dbl>   <dbl>   <dbl>    <dbl>  <dbl>    <dbl>    <dbl>
##  1  42009 Bedf~    7.98 0.413 -1.86    0.142    0.0263 0.0486    0.977  0.00371
##  2  42027 Cent~    7.50 0.472  1.71    0.185    0.0168 0.0371    0.889  0.0287 
##  3  42029 Ches~    7.68 0.448 -2.35    0.0620   0.0127 0.0489    0.844  0.0607 
##  4  42041 Cumb~    7.85 0.408 -1.51    0.0641   0.0161 0.0540    0.914  0.0282 
##  5  42055 Fran~    7.84 0.401 -1.41    0.0789   0.0203 0.0584    0.921  0.0285 
##  6  42061 Hunt~    7.99 0.410 -1.36    0.109    0.0211 0.0541    0.921  0.0537 
##  7  42071 Lanc~    7.75 0.410 -0.0573  0.0898   0.0224 0.0628    0.870  0.0291 
##  8  42075 Leba~    7.95 0.396 -0.979   0.0805   0.0196 0.0703    0.896  0.0149 
##  9  42077 Lehi~    7.97 0.439  0.341   0.110    0.0259 0.0797    0.762  0.0424 
## 10  42091 Mont~    7.73 0.454 -1.74    0.0549   0.0122 0.0574    0.822  0.0787 
## 11  42095 Nort~    7.54 0.415 -1.38    0.0781   0.0189 0.0738    0.837  0.0370 
## 12  42103 Pike     7.55 0.416 -1.91    0.0907   0.0205 0.0799    0.846  0.0490 
## 13  42109 Snyd~    7.75 0.408 -1.16    0.115    0.0263 0.0542    0.964  0.00996
## 14  42119 Union    7.30 0.443 -0.467   0.127    0.0220 0.0531    0.859  0.0540 
## 15  42133 York     7.91 0.394 -1.68    0.0838   0.0188 0.0682    0.885  0.0457 
## # ... with 3 more variables: hispanic <dbl>, ski05pcm <dbl>, metro <dbl>

CI for low mortality counties

CI(pamortl$gini, ci=0.95)

##     upper      mean     lower 
## 0.4347674 0.4218000 0.4088326

pamorth <-PA_Mortality %>% 
  filter(avemort > 8)
print(pamorth)

## # A tibble: 52 x 13
##    cofips name  avemort  gini  depriv povrate pubassis fmlhhd nhispwht nhispblk
##     <dbl> <chr>   <dbl> <dbl>   <dbl>   <dbl>    <dbl>  <dbl>    <dbl>    <dbl>
##  1  42001 Adams    8.24 0.384 -1.94    0.0705   0.0151 0.0626    0.908  0.0160 
##  2  42003 Alle~    8.79 0.481  1.48    0.126    0.0306 0.0729    0.822  0.125  
##  3  42005 Arms~    8.76 0.403 -0.896   0.116    0.0298 0.0626    0.977  0.00887
##  4  42007 Beav~    8.70 0.414 -1.15    0.106    0.0262 0.0673    0.913  0.0574 
##  5  42011 Berks    8.20 0.414 -0.397   0.118    0.0292 0.0836    0.799  0.0386 
##  6  42013 Blair    9.43 0.434 -0.390   0.132    0.0378 0.0747    0.964  0.0144 
##  7  42015 Brad~    8.22 0.420 -0.763   0.149    0.0305 0.0746    0.970  0.00523
##  8  42017 Bucks    8.36 0.424 -2.43    0.0487   0.0155 0.0522    0.886  0.0355 
##  9  42019 Butl~    8.22 0.422 -2.09    0.0835   0.0223 0.0606    0.965  0.00899
## 10  42021 Camb~    8.68 0.427 -0.0585  0.142    0.0364 0.0695    0.941  0.0325 
## # ... with 42 more rows, and 3 more variables: hispanic <dbl>, ski05pcm <dbl>,
## #   metro <dbl>

CI for high mortality counties

CI(pamorth$gini, ci=0.95)

##     upper      mean     lower 
## 0.4265801 0.4200577 0.4135352

These confidence intervals overlap

The confidence intervals provide a range where there is 95% certainty that the true mean gini coefficient of the population with low mortality lies between 0.408 and 0.434. For high mortality populations the confidence intervals are 0.413 and 0.426. Given that the CI’s for both low and high mortality counties overlap, this may suggest that the gini coefficients for both populations are similar, however further statistical testing would be needed to confirm this.

Neither high or low mortality counties had mean or even lower bound gini coefficients under the inequality threshold of 0.4. This would be an indicator of inequality not being associated with mortality levels within a community, however there are important considerations for including inequality in subsequent models. First, the number of high mortality counties was 3 times the amount of low mortality ones. This explains the tighter CI for high mortality. Secondly, despite neither group reaching below the inequality threshold, low mortality came the closest with a lower bound of 0.408. Lastly, the literature provides plenty of evidence associating health outcomes with differences in socioeconomic and racial inequality.

```

DEM 7273 Midterm

Daniel Mamani

10/20/2020