head(infantmort)
## # A tibble: 6 x 9
## year maternalraceore… infantmortality… neonatalmortali… postneonatalmor…
## <dbl> <chr> <dbl> <dbl> <dbl>
## 1 2016 Asian/Pacific I… 2.9 2 0.9
## 2 2015 Asian/Pacific I… 2.6 1.6 1
## 3 2014 Asian/Pacific I… 2.6 1.8 0.8
## 4 2013 Asian/Pacific I… 3.1 2.5 0.6
## 5 2012 Asian/Pacific I… 3.3 2.1 1.2
## 6 2011 Asian/Pacific I… 2.9 1.8 1.2
## # … with 4 more variables: infantdeaths <dbl>, neonatalinfantdeaths <dbl>,
## # postneonatalinfantdeaths <dbl>, numberoflivebirths <dbl>
p1 <- infantmort %>%
ggplot(aes(x=maternalraceorethnicity,y=infantmortalityrate,fill=maternalraceorethnicity))+geom_boxplot()+ggtitle("Infant Mortality Rate by Ethnicity") + xlab("Maternal Race or Ethnicity")+ylab("Infant Mortality Rate")+theme(axis.text.x=element_blank(),axis.ticks.x=element_blank())
p1
Asian <- c(62,21566)
White <- c(105,40633)
imr <- data.frame(Asian,White)
rownames(imr) <- c('Number of Infant Deaths', 'Number of Live Births')
imr
## Asian White
## Number of Infant Deaths 62 105
## Number of Live Births 21566 40633
chisq.test(imr)
##
## Pearson's Chi-squared test with Yates' continuity correction
##
## data: imr
## X-squared = 0.3408, df = 1, p-value = 0.5594
imr_b <- infantmort[c(11:20),c(1,3)]
imr_w <- infantmort[c(41:50),c(1,3)]
par(mfrow=c(1,2))
hist(imr_b$infantmortalityrate, cex.main=1)
hist(imr_w$infantmortalityrate, cex.main=1)
bimr <- c(9.8,10.2,9.5,8.6,8.1,8.5,8.3,7.5,8,8)
wimr <- c(3.9,3.3,3.4,2.8,3.1,2.7,3,2.6,2,7,2.6)
wilcox.test(bimr,wimr,conf.int = TRUE,conf.level = .95)
## Warning in wilcox.test.default(bimr, wimr, conf.int = TRUE, conf.level =
## 0.95): cannot compute exact p-value with ties
## Warning in wilcox.test.default(bimr, wimr, conf.int = TRUE, conf.level =
## 0.95): cannot compute exact confidence intervals with ties
##
## Wilcoxon rank sum test with continuity correction
##
## data: bimr and wimr
## W = 110, p-value = 0.0001229
## alternative hypothesis: true location shift is not equal to 0
## 95 percent confidence interval:
## 4.700032 6.399998
## sample estimates:
## difference in location
## 5.400028
plot(imr_b, ylim= c(0,11), xlim=c(2007,2016),col='red',pch=8,main="Infant Mortality Rates in NYC from 2007-2016")
par(new=TRUE)
plot(imr_w, ylim=c(0,11), xlim=c(2007,2016), col='blue', pch=8,main="Infant Mortality Rates in NYC from 2007-2016")
legend(x=2014,y=11, legend=c('Black Non-H', 'White Non-H'), col=c('red', 'blue'), pch=c(8,8))
abline(h=4.1, lty=2,col='purple')
text(x=2011,y=4.5, labels=c('Average NYC Infant Mortality Rate (4.1)'),col='purple')
abline(h=5.7, lty=2)
text(x=2011,y=6.1, labels=c('Average US Infant Mortality Rate (5.7)'))
imr_fit <- lm(infantmort$infantmortalityrate~as.factor(infantmort$year)+as.factor(infantmort$maternalraceorethnicity)+infantmort$numberoflivebirths)
summary(imr_fit)
##
## Call:
## lm(formula = infantmort$infantmortalityrate ~ as.factor(infantmort$year) +
## as.factor(infantmort$maternalraceorethnicity) + infantmort$numberoflivebirths)
##
## Residuals:
## Min 1Q Median 3Q Max
## -1.67133 -0.25852 -0.08323 0.33196 1.73812
##
## Coefficients:
## Estimate
## (Intercept) 6.707e-01
## as.factor(infantmort$year)2008 2.390e-01
## as.factor(infantmort$year)2009 6.157e-03
## as.factor(infantmort$year)2010 -1.995e-01
## as.factor(infantmort$year)2011 -2.495e-01
## as.factor(infantmort$year)2012 -1.157e-01
## as.factor(infantmort$year)2013 -5.260e-01
## as.factor(infantmort$year)2014 -3.854e-01
## as.factor(infantmort$year)2015 -5.067e-01
## as.factor(infantmort$year)2016 -1.078e+00
## as.factor(infantmort$maternalraceorethnicity)Black Non-H 4.882e+00
## as.factor(infantmort$maternalraceorethnicity)Other Hispanic 1.842e-01
## as.factor(infantmort$maternalraceorethnicity)Puerto Rican 4.531e+00
## as.factor(infantmort$maternalraceorethnicity)White Non-H -2.581e+00
## infantmort$numberoflivebirths 1.324e-04
## Std. Error
## (Intercept) 1.435e+00
## as.factor(infantmort$year)2008 3.859e-01
## as.factor(infantmort$year)2009 3.880e-01
## as.factor(infantmort$year)2010 3.934e-01
## as.factor(infantmort$year)2011 3.946e-01
## as.factor(infantmort$year)2012 3.938e-01
## as.factor(infantmort$year)2013 4.036e-01
## as.factor(infantmort$year)2014 3.977e-01
## as.factor(infantmort$year)2015 4.004e-01
## as.factor(infantmort$year)2016 4.048e-01
## as.factor(infantmort$maternalraceorethnicity)Black Non-H 4.764e-01
## as.factor(infantmort$maternalraceorethnicity)Other Hispanic 6.653e-01
## as.factor(infantmort$maternalraceorethnicity)Puerto Rican 7.704e-01
## as.factor(infantmort$maternalraceorethnicity)White Non-H 1.339e+00
## infantmort$numberoflivebirths 6.674e-05
## t value
## (Intercept) 0.467
## as.factor(infantmort$year)2008 0.619
## as.factor(infantmort$year)2009 0.016
## as.factor(infantmort$year)2010 -0.507
## as.factor(infantmort$year)2011 -0.632
## as.factor(infantmort$year)2012 -0.294
## as.factor(infantmort$year)2013 -1.303
## as.factor(infantmort$year)2014 -0.969
## as.factor(infantmort$year)2015 -1.266
## as.factor(infantmort$year)2016 -2.664
## as.factor(infantmort$maternalraceorethnicity)Black Non-H 10.248
## as.factor(infantmort$maternalraceorethnicity)Other Hispanic 0.277
## as.factor(infantmort$maternalraceorethnicity)Puerto Rican 5.881
## as.factor(infantmort$maternalraceorethnicity)White Non-H -1.928
## infantmort$numberoflivebirths 1.984
## Pr(>|t|)
## (Intercept) 0.6431
## as.factor(infantmort$year)2008 0.5398
## as.factor(infantmort$year)2009 0.9874
## as.factor(infantmort$year)2010 0.6152
## as.factor(infantmort$year)2011 0.5313
## as.factor(infantmort$year)2012 0.7707
## as.factor(infantmort$year)2013 0.2010
## as.factor(infantmort$year)2014 0.3391
## as.factor(infantmort$year)2015 0.2140
## as.factor(infantmort$year)2016 0.0116 *
## as.factor(infantmort$maternalraceorethnicity)Black Non-H 4.44e-12 ***
## as.factor(infantmort$maternalraceorethnicity)Other Hispanic 0.7835
## as.factor(infantmort$maternalraceorethnicity)Puerto Rican 1.11e-06 ***
## as.factor(infantmort$maternalraceorethnicity)White Non-H 0.0620 .
## infantmort$numberoflivebirths 0.0552 .
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.6051 on 35 degrees of freedom
## Multiple R-squared: 0.9491, Adjusted R-squared: 0.9287
## F-statistic: 46.59 on 14 and 35 DF, p-value: < 2.2e-16
plot(imr_fit)
imr_2016 <- infantmort[c(1,11,21,31,41),c(2:3,9)]
imr_2016
## # A tibble: 5 x 3
## maternalraceorethnicity infantmortalityrate numberoflivebirths
## <chr> <dbl> <dbl>
## 1 Asian/Pacific Islander 2.9 21566
## 2 Black Non-H 8 22465
## 3 Other Hispanic 3.8 26915
## 4 Puerto Rican 3.4 7159
## 5 White Non-H 2.6 40633
imr_fit_2016 <- lm(imr_2016$infantmortalityrate~imr_2016$numberoflivebirths)
summary(imr_fit_2016)
##
## Call:
## lm(formula = imr_2016$infantmortalityrate ~ imr_2016$numberoflivebirths)
##
## Residuals:
## 1 2 3 4 5
## -1.3045 3.8221 -0.2464 -1.2304 -1.0408
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 4.842e+00 2.729e+00 1.774 0.174
## imr_2016$numberoflivebirths -2.956e-05 1.047e-04 -0.282 0.796
##
## Residual standard error: 2.514 on 3 degrees of freedom
## Multiple R-squared: 0.02589, Adjusted R-squared: -0.2988
## F-statistic: 0.07973 on 1 and 3 DF, p-value: 0.796