Data Source: http://www.childmortality.org/

Data Set:“Estimates for under-five, infant and neonatal mortality”

The dataset contains six value variables of interest: under-five (0-4 years) mortality, infant (0-1 years) mortality, neonatal (0-1 month) mortality, as well as the number of under-five, infant, and neonatal deaths.

  1. Installing necessary packages
# install.packages("tidyr")
# install.packages("dplyr")
# install.packages("ggplot2")
library(tidyr)
library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library(ggplot2)
library(stringr)
  1. Reading csv file
death_rate<- read.csv("https://raw.githubusercontent.com/olgashiligin/project2_607/master/Child_Death_Rate.csv")
  1. Performing data transformation:
median_bound<-death_rate%>% 
  gather(year_type, value, U5MR.1950:Neonatal.Deaths.2015) %>%
  mutate(year = sub('.*\\.', '',year_type), type = sub('\\.\\d{4}', '', year_type)) %>%
  filter(Uncertainty.bounds. == "Median"& year>=1998) %>% 
  select(-c(year_type,Uncertainty.bounds.))
head(median_bound)
##   ISO.Code          CountryName value year type
## 1      AFG          Afghanistan 142.6 1998 U5MR
## 2      AGO               Angola 220.8 1998 U5MR
## 3      ALB              Albania  28.9 1998 U5MR
## 4      AND              Andorra   5.0 1998 U5MR
## 5      ARE United Arab Emirates  11.8 1998 U5MR
## 6      ARG            Argentina  21.4 1998 U5MR

Overall analysis

stats<-median_bound %>% 
  group_by(type) %>% 
  summarise(avg_value = mean(value))

head(stats)
## # A tibble: 6 x 2
##   type              avg_value
##   <chr>                 <dbl>
## 1 IMR                    32.1
## 2 Infant.Deaths       29360. 
## 3 Neonatal.Deaths     17090. 
## 4 NMR                    17.2
## 5 U5MR                   45.2
## 6 Under.five.Deaths   40965.

For the last 20 years Death Under Five group has the highest average MR - 45, whereas the lowest MR has group Neonatal.Deaths - 17.

What country has the highest and the lowest under-five MR?

U5MR<-median_bound %>% 
  filter(type=="U5MR") %>% 
  arrange(desc(value))

head(U5MR)
##   ISO.Code  CountryName value year type
## 1      SLE Sierra Leone 246.0 1998 U5MR
## 2      NER        Niger 245.8 1998 U5MR
## 3      SLE Sierra Leone 241.1 1999 U5MR
## 4      NER        Niger 236.4 1999 U5MR
## 5      SLE Sierra Leone 235.8 2000 U5MR
## 6      RWA       Rwanda 234.0 1998 U5MR
tail(U5MR)
##      ISO.Code CountryName value year type
## 3505      ISL     Iceland   2.1 2013 U5MR
## 3506      ISL     Iceland   2.1 2014 U5MR
## 3507      LUX  Luxembourg   2.0 2013 U5MR
## 3508      LUX  Luxembourg   2.0 2014 U5MR
## 3509      ISL     Iceland   2.0 2015 U5MR
## 3510      LUX  Luxembourg   1.9 2015 U5MR

What countries have the highest and the lowest IMR?

IMR<-median_bound %>% 
  filter(type=="IMR") %>% 
  arrange(desc(value))

head(IMR)
##   ISO.Code  CountryName value year type
## 1      SLE Sierra Leone 148.1 1998  IMR
## 2      SLE Sierra Leone 145.8 1999  IMR
## 3      SLE Sierra Leone 143.3 2000  IMR
## 4      SLE Sierra Leone 140.5 2001  IMR
## 5      LBR      Liberia 138.1 1998  IMR
## 6      SLE Sierra Leone 137.7 2002  IMR
tail(IMR)
##      ISO.Code CountryName value year type
## 3505      ISL     Iceland   1.6 2013  IMR
## 3506      LUX  Luxembourg   1.6 2013  IMR
## 3507      ISL     Iceland   1.6 2014  IMR
## 3508      LUX  Luxembourg   1.6 2014  IMR
## 3509      ISL     Iceland   1.6 2015  IMR
## 3510      LUX  Luxembourg   1.5 2015  IMR

What countries have the highest and the lowest NMR?

NMR<-median_bound %>% 
  filter(type=="NMR") %>% 
  arrange(desc(value))

head(NMR)
##   ISO.Code CountryName value year type
## 1      PAK    Pakistan  63.2 1998  NMR
## 2      PAK    Pakistan  61.8 1999  NMR
## 3      MLI        Mali  61.1 1998  NMR
## 4      PAK    Pakistan  60.4 2000  NMR
## 5      SSD South Sudan  60.1 1998  NMR
## 6      MLI        Mali  59.1 1999  NMR
tail(NMR)
##      ISO.Code CountryName value year type
## 3505      SMR  San Marino   0.8 2010  NMR
## 3506      SMR  San Marino   0.8 2011  NMR
## 3507      SMR  San Marino   0.8 2012  NMR
## 3508      SMR  San Marino   0.7 2013  NMR
## 3509      SMR  San Marino   0.7 2014  NMR
## 3510      SMR  San Marino   0.7 2015  NMR

What year have 3 types of child death had the highest number in?

stats_by_year<-median_bound %>% 
  group_by(type, year) %>% 
  summarise(avg_value = mean(value)) %>%
  group_by(type, year) %>% 
  summarise(max_avg_value = max(avg_value)) %>% 
  arrange(year,desc(max_avg_value)) %>% 
  slice(1)
  
stats_by_year
## # A tibble: 6 x 3
## # Groups:   type [6]
##   type              year  max_avg_value
##   <chr>             <chr>         <dbl>
## 1 IMR               1998           41.9
## 2 Infant.Deaths     1998        36468. 
## 3 Neonatal.Deaths   1998        20715. 
## 4 NMR               1998           21.4
## 5 U5MR              1998           61.2
## 6 Under.five.Deaths 1998        52641.

As we see 1988 was the worst year in terms of child death among all 3 groups of child death for the last 20 years.

Let’s pick 3 countrise, one from developed countries (United Kingdom), one from economies in transition (Belarus) and one from undeveloped (Angola) counrties in order to compare child death rates (Classifucation was taken from United Nations Country Classification).

United Kingdom

GB<-median_bound %>% 
  filter(CountryName=="United Kingdom") %>% 
  group_by(type) %>% 
  summarise(avg_value = mean(value))

GB
## # A tibble: 6 x 2
##   type              avg_value
##   <chr>                 <dbl>
## 1 IMR                    4.8 
## 2 Infant.Deaths       3617.  
## 3 Neonatal.Deaths     2457.  
## 4 NMR                    3.26
## 5 U5MR                   5.67
## 6 Under.five.Deaths   4264.

Belarus

Belarus<-median_bound %>% 
  filter(CountryName=="Belarus") %>% 
  group_by(type) %>% 
  summarise(avg_value = mean(value))

Belarus
## # A tibble: 6 x 2
##   type              avg_value
##   <chr>                 <dbl>
## 1 IMR                    7.14
## 2 Infant.Deaths        715.  
## 3 Neonatal.Deaths      419.  
## 4 NMR                    4.22
## 5 U5MR                   9.12
## 6 Under.five.Deaths    907

Angola

Angola<-median_bound %>% 
  filter(CountryName=="Angola") %>% 
  group_by(type) %>% 
  summarise(avg_value = mean(value))

Angola
## # A tibble: 6 x 2
##   type              avg_value
##   <chr>                 <dbl>
## 1 IMR                   116. 
## 2 Infant.Deaths      102518. 
## 3 Neonatal.Deaths     48698. 
## 4 NMR                    54.7
## 5 U5MR                  194. 
## 6 Under.five.Deaths  169552

Results are quite expected and show negative correlation between child mortality variables and country’s economic development. (Ignore absolute death numbers).