Note: Below is the tidying and merging of three wide global datasets from UNICEF. THe merged dataset comprises of child, infant, and under-five mortality rates from 1950 to 2017 in each country.

Import packages

require(tidyr)
require(dplyr)
require(reshape)
require(stringr)
require(car)
library(ggplot2)

Read in data

childfile = '/Users/euniceok/PycharmProjects/cuny/spring2019/Week6/data/child1_4mortalityrate.csv'
infantfile = '/Users/euniceok/PycharmProjects/cuny/spring2019/Week6/data/infant0_1mortalityrate.csv'
und5file = '/Users/euniceok/PycharmProjects/cuny/spring2019/Week6/data/underfivemortalityrate.csv'

child = read.csv(childfile, skip=10, header=T)
child = child[c(1:139)]

infant = read.csv(infantfile, skip=10, header=T)
infant = infant[c(1:139)]

und5 = read.csv(und5file, skip=10, header=T)

Transformations

# But first, identify shapes
print(dim(child))
## [1] 594 139
print(dim(infant))
## [1] 586 139
print(dim(und5))
## [1] 590 139
# check out how the data looks right now
print(head(child, n=1))
##   ISO.Code     Country Uncertainty.bounds. CMR.1950 CMR.1951 CMR.1952
## 1      AFG Afghanistan               Lower       NA       NA       NA
##   CMR.1953 CMR.1954 CMR.1955 CMR.1956 CMR.1957 CMR.1958 CMR.1959 CMR.1960
## 1       NA       NA       NA       NA       NA       NA       NA    125.2
##   CMR.1961 CMR.1962 CMR.1963 CMR.1964 CMR.1965 CMR.1966 CMR.1967 CMR.1968
## 1    125.2    124.3      123    120.9    118.7    116.7    114.4    111.9
##   CMR.1969 CMR.1970 CMR.1971 CMR.1972 CMR.1973 CMR.1974 CMR.1975 CMR.1976
## 1    109.5    107.1    105.1    102.7    100.4     98.2     95.7     93.3
##   CMR.1977 CMR.1978 CMR.1979 CMR.1980 CMR.1981 CMR.1982 CMR.1983 CMR.1984
## 1     90.8     88.3     85.7     82.9     80.2     77.5     74.9     72.2
##   CMR.1985 CMR.1986 CMR.1987 CMR.1988 CMR.1989 CMR.1990 CMR.1991 CMR.1992
## 1     69.7     67.1     64.5     61.7     59.1     56.5     53.9     51.6
##   CMR.1993 CMR.1994 CMR.1995 CMR.1996 CMR.1997 CMR.1998 CMR.1999 CMR.2000
## 1     49.3     47.3     45.5     43.9     42.4       41     39.7     38.3
##   CMR.2001 CMR.2002 CMR.2003 CMR.2004 CMR.2005 CMR.2006 CMR.2007 CMR.2008
## 1     36.9     35.4     33.9     32.4       31     29.4     27.7       26
##   CMR.2009 CMR.2010 CMR.2011 CMR.2012 CMR.2013 CMR.2014 CMR.2015 CMR.2016
## 1     24.2     22.4     20.7     18.7       17     15.3     13.9     12.7
##   CMR.2017 Child.deaths.1.4years.1950 Child.deaths.1.4years.1951
## 1     11.6                         NA                         NA
##   Child.deaths.1.4years.1952 Child.deaths.1.4years.1953
## 1                         NA                         NA
##   Child.deaths.1.4years.1954 Child.deaths.1.4years.1955
## 1                         NA                         NA
##   Child.deaths.1.4years.1956 Child.deaths.1.4years.1957
## 1                         NA                         NA
##   Child.deaths.1.4years.1958 Child.deaths.1.4years.1959
## 1                         NA                         NA
##   Child.deaths.1.4years.1960 Child.deaths.1.4years.1961
## 1                         NA                         NA
##   Child.deaths.1.4years.1962 Child.deaths.1.4years.1963
## 1                         NA                         NA
##   Child.deaths.1.4years.1964 Child.deaths.1.4years.1965
## 1                         NA                      45877
##   Child.deaths.1.4years.1966 Child.deaths.1.4years.1967
## 1                      46140                      46431
##   Child.deaths.1.4years.1968 Child.deaths.1.4years.1969
## 1                      46646                      46934
##   Child.deaths.1.4years.1970 Child.deaths.1.4years.1971
## 1                      47165                      47449
##   Child.deaths.1.4years.1972 Child.deaths.1.4years.1973
## 1                      47667                      47855
##   Child.deaths.1.4years.1974 Child.deaths.1.4years.1975
## 1                      48043                      48036
##   Child.deaths.1.4years.1976 Child.deaths.1.4years.1977
## 1                      47997                      47772
##   Child.deaths.1.4years.1978 Child.deaths.1.4years.1979
## 1                      47380                      46658
##   Child.deaths.1.4years.1980 Child.deaths.1.4years.1981
## 1                      45658                      44430
##   Child.deaths.1.4years.1982 Child.deaths.1.4years.1983
## 1                      43043                      41492
##   Child.deaths.1.4years.1984 Child.deaths.1.4years.1985
## 1                      39732                      37916
##   Child.deaths.1.4years.1986 Child.deaths.1.4years.1987
## 1                      36079                      34303
##   Child.deaths.1.4years.1988 Child.deaths.1.4years.1989
## 1                      32611                      31163
##   Child.deaths.1.4years.1990 Child.deaths.1.4years.1991
## 1                      29952                      29066
##   Child.deaths.1.4years.1992 Child.deaths.1.4years.1993
## 1                      28517                      28265
##   Child.deaths.1.4years.1994 Child.deaths.1.4years.1995
## 1                      28290                      28584
##   Child.deaths.1.4years.1996 Child.deaths.1.4years.1997
## 1                      29036                      29601
##   Child.deaths.1.4years.1998 Child.deaths.1.4years.1999
## 1                      30167                      30704
##   Child.deaths.1.4years.2000 Child.deaths.1.4years.2001
## 1                      31056                      31225
##   Child.deaths.1.4years.2002 Child.deaths.1.4years.2003
## 1                      31141                      30859
##   Child.deaths.1.4years.2004 Child.deaths.1.4years.2005
## 1                      30412                      29812
##   Child.deaths.1.4years.2006 Child.deaths.1.4years.2007
## 1                      29004                      27947
##   Child.deaths.1.4years.2008 Child.deaths.1.4years.2009
## 1                      26691                      25274
##   Child.deaths.1.4years.2010 Child.deaths.1.4years.2011
## 1                      23685                      21932
##   Child.deaths.1.4years.2012 Child.deaths.1.4years.2013
## 1                      20025                      18158
##   Child.deaths.1.4years.2014 Child.deaths.1.4years.2015
## 1                      16430                      14963
##   Child.deaths.1.4years.2016 Child.deaths.1.4years.2017
## 1                      13735                      12581
#print(head(infant, n=1))
#print(head(und5, n=1))
# Bind the 3 datasets together - child, infant, und5
master = data.frame()
datasets = list(child ,infant,und5)

for (df in datasets) {
  df = filter(df, Uncertainty.bounds.  == 'Median')
  df = gather(df, Years, Value, 4:139)
  df$Type = str_sub(df$Years, end = -6)
  df$Year = str_sub(df$Years, start =-4, end =  -1)
  df = select(df, -Uncertainty.bounds., -Years, -ISO.Code)
  # print(dim(df))
  master = bind_rows(master,df)
  # print(dim(master))
  }
## Warning in bind_rows_(x, .id): Unequal factor levels: coercing to character
## Warning in bind_rows_(x, .id): binding character and factor vector,
## coercing into character vector

## Warning in bind_rows_(x, .id): binding character and factor vector,
## coercing into character vector

## Warning in bind_rows_(x, .id): binding character and factor vector,
## coercing into character vector
# Check out master dataframe to make sure 3 dataframes were properly bound
print(dim(master))
## [1] 79560     4
print(head(master))
##                Country Value Type Year
## 1          Afghanistan    NA  CMR 1950
## 2               Angola    NA  CMR 1950
## 3              Albania    NA  CMR 1950
## 4              Andorra    NA  CMR 1950
## 5 United Arab Emirates    NA  CMR 1950
## 6            Argentina    NA  CMR 1950
print(tail(master))
##            Country Value              Type Year
## 79555      Vanuatu   188 Under.five.Deaths 2017
## 79556        Samoa    79 Under.five.Deaths 2017
## 79557        Yemen 47966 Under.five.Deaths 2017
## 79558 South Africa 43254 Under.five.Deaths 2017
## 79559       Zambia 37604 Under.five.Deaths 2017
## 79560     Zimbabwe 26663 Under.five.Deaths 2017
require(plyr)
# Check out unique types 
print(unique(master$Type))
## [1] "CMR"                   "Child.deaths.1.4years" "IMR"                  
## [4] "Infant.Deaths"         "U5MR"                  "Under.five.Deaths"
# Recode all types consistently 
master$NewType = revalue(master$Type, c("Child.deaths.1.4years" = "CMR", 
                       "Infant.Deaths" = "IMR", "Under.five.Deaths" = "U5MR", 
                       "CMR" = "CMR", "IMR" = "IMR", 
                      "U5MR" = "U5MR"))

# Check out unique types again
print(unique(master$NewType))
## [1] "CMR"  "IMR"  "U5MR"
master = select(master, -Type)
detach(package:plyr)
# Check out the classes of each column
sapply(master, class)
##     Country       Value        Year     NewType 
## "character"   "numeric" "character" "character"

Analysis

Note on Type and Values

# Remove null values for countries that have no mortality rate entries a given year.
# This shrinks the dataset by 17.5K rows
# print(dim(master))
newmaster = master[complete.cases(master),]
# print(dim(newmaster))
print(tail(newmaster, n=10))
##                                    Country Value Year NewType
## 79551                           Uzbekistan 14702 2017    U5MR
## 79552     Saint Vincent and the Grenadines    27 2017    U5MR
## 79553 Venezuela (Bolivarian Republic of)** 18499 2017    U5MR
## 79554                             Viet Nam 32790 2017    U5MR
## 79555                              Vanuatu   188 2017    U5MR
## 79556                                Samoa    79 2017    U5MR
## 79557                                Yemen 47966 2017    U5MR
## 79558                         South Africa 43254 2017    U5MR
## 79559                               Zambia 37604 2017    U5MR
## 79560                             Zimbabwe 26663 2017    U5MR

How do infant and child and under-five mortality rates compare with each other globally?

newmaster %>%
  group_by(NewType) %>%
  summarize(mean_mortality = mean(Value), total_mortality = sum(Value))
## # A tibble: 3 x 3
##   NewType mean_mortality total_mortality
##   <chr>            <dbl>           <dbl>
## 1 CMR              9675.      197493213.
## 2 IMR             23488.      497779222.
## 3 U5MR            31727.      647640459.

There were a total of 500 million infant mortality deaths in the world and 23.5K on average per country in the past 67 years. Although infant mortality is only measured across one year, there is a 2.5 higher likelihood, on average, that an child will die between birth and 1 year of age than between the ages of 1 and 4.

u5mragg = newmaster %>%
  group_by(NewType, Year) %>%
  summarize(mean_mortality = mean(Value), total_mortality = sum(Value)) %>%
  filter(Year %in% c('1950', '1960', '1970', '1980','1990','2000','2010'))  %>%
  filter(NewType == 'U5MR')
u5mragg
## # A tibble: 7 x 4
## # Groups:   NewType [1]
##   NewType Year  mean_mortality total_mortality
##   <chr>   <chr>          <dbl>           <dbl>
## 1 U5MR    1950            169.           5582.
## 2 U5MR    1960          56525.       10061478.
## 3 U5MR    1970          45345.       11744328.
## 4 U5MR    1980          42221.       13679596.
## 5 U5MR    1990          32297.       12595808.
## 6 U5MR    2000          25216.        9834329.
## 7 U5MR    2010          17948.        6999758.
# mean under 5 mortality globally over time
ggplot(data=u5mragg, aes(x=Year, y=mean_mortality, group=1)) +
  geom_line()+
  geom_point()

# total under 5 mortality globally over time
ggplot(data=u5mragg, aes(x=Year, y=total_mortality, group=1)) +
  geom_line()+
  geom_point()

# count of countries included in evaluation year over year
ctyear = dplyr::count(newmaster, Year)

ggplot(data=ctyear, aes(x=Year, y=n, group=1)) +
  geom_line()+
  geom_point() + 
  theme(axis.text.x = element_text(angle = 90, vjust = 1, hjust=1))

Conclusion

The under five mortality rate (U5M) encompasses both IMR and CMR and therefore a general metric that is a good proxy for both. On average, the U5M appears to be decreasing over the years globally. Though it is odd that the total incidents of U5M peak in 1980, this is due to the fact that the number of countries included in the dataset increased steadily until 1980. After that point, it held steady. This supports the conclusion extracted from both the mean and total graphs that the U5M rate is decreasing.

Obviously, this analysis is very high level and there is opportunity to dig into trends of individual countries, regions, etc. In addition, joining this dataset with other global economic and health indicators would open up further doors to digging into the possible causes of the global decrease in U5M rate and – I suspect – big differences across countries and regions.