library (tidyverse)
## -- Attaching packages --------------------------------------- tidyverse 1.3.0 --
## v ggplot2 3.3.2 v purrr 0.3.4
## v tibble 3.0.3 v dplyr 1.0.2
## v tidyr 1.1.1 v stringr 1.4.0
## v readr 1.3.1 v forcats 0.5.0
## -- Conflicts ------------------------------------------ tidyverse_conflicts() --
## x dplyr::filter() masks stats::filter()
## x dplyr::lag() masks stats::lag()
library(Hmisc)
## Loading required package: lattice
## Loading required package: survival
## Loading required package: Formula
##
## Attaching package: 'Hmisc'
## The following objects are masked from 'package:dplyr':
##
## src, summarize
## The following objects are masked from 'package:base':
##
## format.pval, units
library(lme4)
## Loading required package: Matrix
##
## Attaching package: 'Matrix'
## The following objects are masked from 'package:tidyr':
##
## expand, pack, unpack
library(dplyr)
library(haven)
library(haven)
library(readr)
library(dplyr)
library(knitr)
library(tidyverse)
library(ggplot2)
library(ipumsr)
library(psych)
##
## Attaching package: 'psych'
## The following object is masked from 'package:Hmisc':
##
## describe
## The following objects are masked from 'package:ggplot2':
##
## %+%, alpha
wad <- read_dta("C:/Users/chris/Downloads/PA_Mortality.dta")
View(wad)
chrs <- subset(wad, select = c("avemort","gini"))
chrs$avemort = ifelse(wad$avemort <= 8, "Low Mortality", "High Mortality")
chrs$gini = ifelse(wad$gini <= 0.4, "Equal", "Unequal")
wad$avemort <- chrs$avemort
high = subset(wad, avemort == "High Mortality")
low = subset(wad, avemort == "Low Mortality")
length(high$gini)
## [1] 52
length(low$gini)
## [1] 15
t.test(high$gini, mu=0, alternative="two.sided", conf.level=.95)
##
## One Sample t-test
##
## data: high$gini
## t = 129.29, df = 51, p-value < 2.2e-16
## alternative hypothesis: true mean is not equal to 0
## 95 percent confidence interval:
## 0.4135352 0.4265801
## sample estimates:
## mean of x
## 0.4200577
t.test(low$gini, mu=0, alternative="two.sided", conf.level=.95)
##
## One Sample t-test
##
## data: low$gini
## t = 69.765, df = 14, p-value < 2.2e-16
## alternative hypothesis: true mean is not equal to 0
## 95 percent confidence interval:
## 0.4088326 0.4347674
## sample estimates:
## mean of x
## 0.4218
hist(high$gini, main = "County Level Gini Coefficient Mortality Rate - High Mortality")
abline(v=mean(high$gini), col="Red")
abline(v=median(high$gini), col="Blue")
hist(low$gini, main = "County Level Gini Coefficient Mortality Rate - Low Mortality")
abline(v=mean(low$gini), col="Red")
abline(v=median(low$gini), col="Blue")
Yes the CI intervals overlaps based on the results obtained.
The CI intervals for the gini coefficient for the counties with high mortality and counties with low mortality mean that we are 95% confident that the true mean lie between 0.4135352 and 0.4265801, and 0.4088326 and 0.4347674 respectively.
Based on the mortality levels and the gini coefficient, I can conclude that, the differences may be due to differences in their sample sizes.