##load libraries
library(haven)
library(readr)
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(knitr)
library(tidyverse)
## Warning: package 'tidyverse' was built under R version 4.0.3
## -- Attaching packages --------------------------------------- tidyverse 1.3.0 --
## v ggplot2 3.3.2 v purrr 0.3.4
## v tibble 3.0.3 v stringr 1.4.0
## v tidyr 1.1.1 v forcats 0.5.0
## -- Conflicts ------------------------------------------ tidyverse_conflicts() --
## x dplyr::filter() masks stats::filter()
## x dplyr::lag() masks stats::lag()
##load data
library(haven)
PA_Mortality <- read_dta("PA_Mortality.dta")
View(PA_Mortality)
##a) Generate a boxplot of poverty rate at the county level (2 points). Based on the boxplot, what is the median poverty rate and the interquartile range (IQR) of the poverty rate? (2 points) What’s the minimum and maximum values for the poverty rate? (4 points) ##Note: the function to generate boxplot in R is boxplot(data$var, main=”title of boxplot”)
boxplot(PA_Mortality$povrate, main= "Poverty Rate at County level")
##The median is approximately 0.125
##The IQR is 0.04532328
##Minimum value is approximately 0.04 and max value is approximately 0.24
IQR(PA_Mortality$povrate)
## [1] 0.04532328
summary(PA_Mortality$povrate)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 0.04873 0.09667 0.12455 0.12110 0.14199 0.24159
##b) Is the distribution of poverty rate normally distributed? Why or why not? Describe how you reach to your conclusion. (4 points)
## Yes the poverty rate is normally distributed because the mean and median are close to each other. The mean being 0.12110 and the median being 0.12455
##c) Please create two binary variables based on avemort and gini. For the former, please recode those less than or equal to 8 as “Low Mortality”, otherwise “High Mortality.” For the latter, those less than or equal to 0.4 should be coded as “Equal”, otherwise, “Unequal.” (8 points)
avemort1 <- ifelse(PA_Mortality$avemort<= 8,
c("Low Mortality"), c("High Mortality"))
view(avemort1)
print(avemort1)
## [1] "High Mortality" "High Mortality" "High Mortality" "High Mortality"
## [5] "Low Mortality" "High Mortality" "High Mortality" "High Mortality"
## [9] "High Mortality" "High Mortality" "High Mortality" "High Mortality"
## [13] "High Mortality" "Low Mortality" "Low Mortality" "High Mortality"
## [17] "High Mortality" "High Mortality" "High Mortality" "High Mortality"
## [21] "Low Mortality" "High Mortality" "High Mortality" "High Mortality"
## [25] "High Mortality" "High Mortality" "High Mortality" "Low Mortality"
## [29] "High Mortality" "High Mortality" "Low Mortality" "High Mortality"
## [33] "High Mortality" "High Mortality" "High Mortality" "Low Mortality"
## [37] "High Mortality" "Low Mortality" "Low Mortality" "High Mortality"
## [41] "High Mortality" "High Mortality" "High Mortality" "High Mortality"
## [45] "High Mortality" "Low Mortality" "High Mortality" "Low Mortality"
## [49] "High Mortality" "High Mortality" "High Mortality" "Low Mortality"
## [53] "High Mortality" "High Mortality" "Low Mortality" "High Mortality"
## [57] "High Mortality" "High Mortality" "High Mortality" "Low Mortality"
## [61] "High Mortality" "High Mortality" "High Mortality" "High Mortality"
## [65] "High Mortality" "High Mortality" "Low Mortality"
gini1 <- ifelse(PA_Mortality$gini<= 0.4,
c("Equal"), c("Unequal"))
view(gini1)
print(gini1)
## [1] "Equal" "Unequal" "Unequal" "Unequal" "Unequal" "Unequal" "Unequal"
## [8] "Unequal" "Unequal" "Unequal" "Unequal" "Unequal" "Equal" "Unequal"
## [15] "Unequal" "Unequal" "Unequal" "Equal" "Unequal" "Unequal" "Unequal"
## [22] "Unequal" "Unequal" "Equal" "Unequal" "Unequal" "Unequal" "Unequal"
## [29] "Equal" "Unequal" "Unequal" "Unequal" "Unequal" "Equal" "Unequal"
## [36] "Unequal" "Unequal" "Equal" "Unequal" "Unequal" "Unequal" "Unequal"
## [43] "Unequal" "Unequal" "Equal" "Unequal" "Unequal" "Unequal" "Unequal"
## [50] "Equal" "Unequal" "Unequal" "Unequal" "Unequal" "Unequal" "Unequal"
## [57] "Unequal" "Unequal" "Unequal" "Unequal" "Unequal" "Unequal" "Unequal"
## [64] "Unequal" "Unequal" "Equal" "Equal"
##d) How many counties have high mortality? And how many counties have “unequal” gini coefficient? (8 points)
## 52 counties have a high mortality coefficients
##56 counties have "unequal" coefficients
##e) Show the confidence intervals for gini coefficients when county mortality level is low and high, respectively.
summary(PA_Mortality$gini, c("High Mortality"))
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 0.3520 0.4065 0.4200 0.4204 0.4320 0.4870
summary(PA_Mortality$gini, c("Low Mortality"))
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 0.3520 0.4065 0.4200 0.4204 0.4320 0.4870
sd(PA_Mortality$gini)
## [1] 0.02325894
##High Mortality
a<-0.4204
s<-0.02325894
n<-67
error <- qnorm(0.975)*s/sqrt(n)
left <- a-error
right <- a+error
print(c(left,right))
## [1] 0.4148307 0.4259693
##low mortality
a<-0.4204
s<-0.02325894
n<-67
error <- qnorm(0.975)*s/sqrt(n)
left <- a-error
right <- a+error
print(c(left,right))
## [1] 0.4148307 0.4259693
###i) Do these confidence intervals overlap? (4 points)
##Yes they do because they both have the same mean and median
###ii) Interpret the confidence intervals from e). (8 points)
## I am 95% confident that high mortality is on average between 0.4148 and 0.425 due to inequality
## I am 95% confident that low mortality is on average between 0.4148 and 0.425 due to inequality
###iii) What conclusion(s) can you draw with regard to the county’s mortality levels and gini coefficients? (4 points)
## The high and low mortality rates are equally impacted by inequality.