##load libraries
> library(haven)
> library(readr)
> library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
> library(knitr)
> library(tidyverse)
## -- Attaching packages ------------------------------------------------------------------------------------------------------------------------------------------------- tidyverse 1.3.0 --
## v ggplot2 3.3.2 v purrr 0.3.4
## v tibble 3.0.1 v stringr 1.4.0
## v tidyr 1.1.0 v forcats 0.5.0
## -- Conflicts ---------------------------------------------------------------------------------------------------------------------------------------------------- tidyverse_conflicts() --
## x dplyr::filter() masks stats::filter()
## x dplyr::lag() masks stats::lag()
##6.a
> ##a.
> PA_Mortality <- read_dta("PA_Mortality.dta")
> View(PA_Mortality)
> boxplot(PA_Mortality$povrate, na.rm=T)
> bp<-boxplot(PA_Mortality$povrate, na.rm=T)
> bp
## $stats
## [,1]
## [1,] 0.04873460
## [2,] 0.09666639
## [3,] 0.12454548
## [4,] 0.14198967
## [5,] 0.18808380
##
## $n
## [1] 67
##
## $conf
## [,1]
## [1,] 0.1157968
## [2,] 0.1332941
##
## $out
## [1] 0.2415866
##
## $group
## [1] 1
##
## $names
## [1] ""
> #median(PA_Mortality$povrate)
> mean(PA_Mortality$povrate)
## [1] 0.1210957
> #IQR(PA_Mortality$povrate)
> min(PA_Mortality$povrate)
## [1] 0.0487346
> max(PA_Mortality$povrate)
## [1] 0.2415866
> ##b. When comparing the mean (0.1210957) to the median (0.12454548), the values are very close, indicating a normal distribution. However, upon visual inspection of the boxplot the data shows some negative skewing. It is worth noting that since the IQR is so small, the difference in mean and median may be large enough to indicate skewed data as well. Further testing can be applied to determine e.g. Shapiro Wilk test.
##6.c
> PA_Mort<-PA_Mortality %>%
+ mutate(dum_avemort =ifelse(avemort <=8,"Low Mortality" , "High Mortality"),
+ dum_gini = ifelse(gini <=0.4,"Equal" , "Unequal"))
> #print(PA_Mort)
> #52 counties have high mortality, and 56 counties have unequal Gini Coefficients.
##6.d
> prop.table(table(PA_Mort$dum_avemort))
##
## High Mortality Low Mortality
## 0.7761194 0.2238806
> table(PA_Mort$dum_avemort)
##
## High Mortality Low Mortality
## 52 15
> prop.table(table(PA_Mort$dum_gini))
##
## Equal Unequal
## 0.1641791 0.8358209
> table(PA_Mort$dum_gini)
##
## Equal Unequal
## 11 56
> PA_Mort_l <- PA_Mort %>%
+ filter(dum_avemort == "Low Mortality")
> print(PA_Mort_l)
## # A tibble: 15 x 15
## cofips name avemort gini depriv povrate pubassis fmlhhd nhispwht nhispblk
## <dbl> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 42009 Bedf~ 7.98 0.413 -1.86 0.142 0.0263 0.0486 0.977 0.00371
## 2 42027 Cent~ 7.50 0.472 1.71 0.185 0.0168 0.0371 0.889 0.0287
## 3 42029 Ches~ 7.68 0.448 -2.35 0.0620 0.0127 0.0489 0.844 0.0607
## 4 42041 Cumb~ 7.85 0.408 -1.51 0.0641 0.0161 0.0540 0.914 0.0282
## 5 42055 Fran~ 7.84 0.401 -1.41 0.0789 0.0203 0.0584 0.921 0.0285
## 6 42061 Hunt~ 7.99 0.410 -1.36 0.109 0.0211 0.0541 0.921 0.0537
## 7 42071 Lanc~ 7.75 0.410 -0.0573 0.0898 0.0224 0.0628 0.870 0.0291
## 8 42075 Leba~ 7.95 0.396 -0.979 0.0805 0.0196 0.0703 0.896 0.0149
## 9 42077 Lehi~ 7.97 0.439 0.341 0.110 0.0259 0.0797 0.762 0.0424
## 10 42091 Mont~ 7.73 0.454 -1.74 0.0549 0.0122 0.0574 0.822 0.0787
## 11 42095 Nort~ 7.54 0.415 -1.38 0.0781 0.0189 0.0738 0.837 0.0370
## 12 42103 Pike 7.55 0.416 -1.91 0.0907 0.0205 0.0799 0.846 0.0490
## 13 42109 Snyd~ 7.75 0.408 -1.16 0.115 0.0263 0.0542 0.964 0.00996
## 14 42119 Union 7.30 0.443 -0.467 0.127 0.0220 0.0531 0.859 0.0540
## 15 42133 York 7.91 0.394 -1.68 0.0838 0.0188 0.0682 0.885 0.0457
## # ... with 5 more variables: hispanic <dbl>, ski05pcm <dbl>, metro <dbl>,
## # dum_avemort <chr>, dum_gini <chr>
> PA_Mort_h <- PA_Mort %>%
+ filter(dum_avemort == "High Mortality")
> #print(PA_Mort_h)
> t.test(PA_Mort_l$gini)
##
## One Sample t-test
##
## data: PA_Mort_l$gini
## t = 69.765, df = 14, p-value < 2.2e-16
## alternative hypothesis: true mean is not equal to 0
## 95 percent confidence interval:
## 0.4088326 0.4347674
## sample estimates:
## mean of x
## 0.4218
> t.test(PA_Mort_h$gini)
##
## One Sample t-test
##
## data: PA_Mort_h$gini
## t = 129.29, df = 51, p-value < 2.2e-16
## alternative hypothesis: true mean is not equal to 0
## 95 percent confidence interval:
## 0.4135352 0.4265801
## sample estimates:
## mean of x
## 0.4200577
> #i) Yes
> #ii)At the 95% confidence level we can say that the Gini index will fall between 0.409 and 0.435 for counties with low mortality, and between 0.414 and 0.427 for counties with high mortality.
> #iii)Counties with greater wealth inequality, as measured by the Gini index, appear to have higher mortality rates compared to counties with a lower Gini index.