#Packages
library(haven)
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(readr)
library(ggpubr)
## Loading required package: ggplot2
library(ggplot2)
library(Rmisc)
## Loading required package: lattice
## Loading required package: plyr
## ------------------------------------------------------------------------------
## You have loaded plyr after dplyr - this is likely to cause problems.
## If you need functions from both plyr and dplyr, please load plyr first, then dplyr:
## library(plyr); library(dplyr)
## ------------------------------------------------------------------------------
##
## Attaching package: 'plyr'
## The following object is masked from 'package:ggpubr':
##
## mutate
## The following objects are masked from 'package:dplyr':
##
## arrange, count, desc, failwith, id, mutate, rename, summarise,
## summarize
library(knitr)
library(tidyverse)
## -- Attaching packages ----------------------------------------------- tidyverse 1.3.0 --
## v tibble 3.0.3 v stringr 1.4.0
## v tidyr 1.1.1 v forcats 0.5.0
## v purrr 0.3.4
## -- Conflicts -------------------------------------------------- tidyverse_conflicts() --
## x plyr::arrange() masks dplyr::arrange()
## x purrr::compact() masks plyr::compact()
## x plyr::count() masks dplyr::count()
## x plyr::failwith() masks dplyr::failwith()
## x dplyr::filter() masks stats::filter()
## x plyr::id() masks dplyr::id()
## x dplyr::lag() masks stats::lag()
## x plyr::mutate() masks ggpubr::mutate(), dplyr::mutate()
## x plyr::rename() masks dplyr::rename()
## x plyr::summarise() masks dplyr::summarise()
## x plyr::summarize() masks dplyr::summarize()
#Data
library(haven)
PA_Mortality <- read_dta("PA_Mortality.dta")
View(PA_Mortality)
#Question 5
boxplot(PA_Mortality$povrate,main = "County Level Poverty Rate")
median(PA_Mortality$povrate)
## [1] 0.1245455
IQR(PA_Mortality$povrate)
## [1] 0.04532328
min(PA_Mortality$povrate)
## [1] 0.0487346
max(PA_Mortality$povrate)
## [1] 0.2415866
ggdensity(PA_Mortality$povrate)
The density graph of poverty rate values produces a bell-shaped curve. This shape, while not completely smooth, closely resembles the normal distribution enough to conclude normality.
pamort <- PA_Mortality %>%
transmute(
mort = ifelse(avemort <= 8, "Low Mortality", "High Mortality"),
ineq = ifelse(gini <=0.4, "Equal","Unequal")
)
print(pamort)
## # A tibble: 67 x 2
## mort ineq
## <chr> <chr>
## 1 High Mortality Equal
## 2 High Mortality Unequal
## 3 High Mortality Unequal
## 4 High Mortality Unequal
## 5 Low Mortality Unequal
## 6 High Mortality Unequal
## 7 High Mortality Unequal
## 8 High Mortality Unequal
## 9 High Mortality Unequal
## 10 High Mortality Unequal
## # ... with 57 more rows
There are 52 counties with high mortality
count(pamort$mort)
## x freq
## 1 High Mortality 52
## 2 Low Mortality 15
There are 56 counties with an unequal gini coefficient
count(pamort$ineq)
## x freq
## 1 Equal 11
## 2 Unequal 56
pamortl <- PA_Mortality %>%
filter(avemort <= 8)
print(pamortl)
## # A tibble: 15 x 13
## cofips name avemort gini depriv povrate pubassis fmlhhd nhispwht nhispblk
## <dbl> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 42009 Bedf~ 7.98 0.413 -1.86 0.142 0.0263 0.0486 0.977 0.00371
## 2 42027 Cent~ 7.50 0.472 1.71 0.185 0.0168 0.0371 0.889 0.0287
## 3 42029 Ches~ 7.68 0.448 -2.35 0.0620 0.0127 0.0489 0.844 0.0607
## 4 42041 Cumb~ 7.85 0.408 -1.51 0.0641 0.0161 0.0540 0.914 0.0282
## 5 42055 Fran~ 7.84 0.401 -1.41 0.0789 0.0203 0.0584 0.921 0.0285
## 6 42061 Hunt~ 7.99 0.410 -1.36 0.109 0.0211 0.0541 0.921 0.0537
## 7 42071 Lanc~ 7.75 0.410 -0.0573 0.0898 0.0224 0.0628 0.870 0.0291
## 8 42075 Leba~ 7.95 0.396 -0.979 0.0805 0.0196 0.0703 0.896 0.0149
## 9 42077 Lehi~ 7.97 0.439 0.341 0.110 0.0259 0.0797 0.762 0.0424
## 10 42091 Mont~ 7.73 0.454 -1.74 0.0549 0.0122 0.0574 0.822 0.0787
## 11 42095 Nort~ 7.54 0.415 -1.38 0.0781 0.0189 0.0738 0.837 0.0370
## 12 42103 Pike 7.55 0.416 -1.91 0.0907 0.0205 0.0799 0.846 0.0490
## 13 42109 Snyd~ 7.75 0.408 -1.16 0.115 0.0263 0.0542 0.964 0.00996
## 14 42119 Union 7.30 0.443 -0.467 0.127 0.0220 0.0531 0.859 0.0540
## 15 42133 York 7.91 0.394 -1.68 0.0838 0.0188 0.0682 0.885 0.0457
## # ... with 3 more variables: hispanic <dbl>, ski05pcm <dbl>, metro <dbl>
CI for low mortality counties
CI(pamortl$gini, ci=0.95)
## upper mean lower
## 0.4347674 0.4218000 0.4088326
pamorth <-PA_Mortality %>%
filter(avemort > 8)
print(pamorth)
## # A tibble: 52 x 13
## cofips name avemort gini depriv povrate pubassis fmlhhd nhispwht nhispblk
## <dbl> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 42001 Adams 8.24 0.384 -1.94 0.0705 0.0151 0.0626 0.908 0.0160
## 2 42003 Alle~ 8.79 0.481 1.48 0.126 0.0306 0.0729 0.822 0.125
## 3 42005 Arms~ 8.76 0.403 -0.896 0.116 0.0298 0.0626 0.977 0.00887
## 4 42007 Beav~ 8.70 0.414 -1.15 0.106 0.0262 0.0673 0.913 0.0574
## 5 42011 Berks 8.20 0.414 -0.397 0.118 0.0292 0.0836 0.799 0.0386
## 6 42013 Blair 9.43 0.434 -0.390 0.132 0.0378 0.0747 0.964 0.0144
## 7 42015 Brad~ 8.22 0.420 -0.763 0.149 0.0305 0.0746 0.970 0.00523
## 8 42017 Bucks 8.36 0.424 -2.43 0.0487 0.0155 0.0522 0.886 0.0355
## 9 42019 Butl~ 8.22 0.422 -2.09 0.0835 0.0223 0.0606 0.965 0.00899
## 10 42021 Camb~ 8.68 0.427 -0.0585 0.142 0.0364 0.0695 0.941 0.0325
## # ... with 42 more rows, and 3 more variables: hispanic <dbl>, ski05pcm <dbl>,
## # metro <dbl>
CI for high mortality counties
CI(pamorth$gini, ci=0.95)
## upper mean lower
## 0.4265801 0.4200577 0.4135352
These confidence intervals overlap
The confidence intervals provide a range where there is 95% certainty that the true mean gini coefficient of the population with low mortality lies between 0.408 and 0.434. For high mortality populations the confidence intervals are 0.413 and 0.426. Given that the CI’s for both low and high mortality counties overlap, this may suggest that the gini coefficients for both populations are similar, however further statistical testing would be needed to confirm this.
Neither high or low mortality counties had mean or even lower bound gini coefficients under the inequality threshold of 0.4. This would be an indicator of inequality not being associated with mortality levels within a community, however there are important considerations for including inequality in subsequent models. First, the number of high mortality counties was 3 times the amount of low mortality ones. This explains the tighter CI for high mortality. Secondly, despite neither group reaching below the inequality threshold, low mortality came the closest with a lower bound of 0.408. Lastly, the literature provides plenty of evidence associating health outcomes with differences in socioeconomic and racial inequality.
```