Note: Below is the tidying and merging of three wide global datasets from UNICEF. THe merged dataset comprises of child, infant, and under-five mortality rates from 1950 to 2017 in each country.
require(tidyr)
require(dplyr)
require(reshape)
require(stringr)
require(car)
library(ggplot2)
childfile = '/Users/euniceok/PycharmProjects/cuny/spring2019/Week6/data/child1_4mortalityrate.csv'
infantfile = '/Users/euniceok/PycharmProjects/cuny/spring2019/Week6/data/infant0_1mortalityrate.csv'
und5file = '/Users/euniceok/PycharmProjects/cuny/spring2019/Week6/data/underfivemortalityrate.csv'
child = read.csv(childfile, skip=10, header=T)
child = child[c(1:139)]
infant = read.csv(infantfile, skip=10, header=T)
infant = infant[c(1:139)]
und5 = read.csv(und5file, skip=10, header=T)
# But first, identify shapes
print(dim(child))
## [1] 594 139
print(dim(infant))
## [1] 586 139
print(dim(und5))
## [1] 590 139
# check out how the data looks right now
print(head(child, n=1))
## ISO.Code Country Uncertainty.bounds. CMR.1950 CMR.1951 CMR.1952
## 1 AFG Afghanistan Lower NA NA NA
## CMR.1953 CMR.1954 CMR.1955 CMR.1956 CMR.1957 CMR.1958 CMR.1959 CMR.1960
## 1 NA NA NA NA NA NA NA 125.2
## CMR.1961 CMR.1962 CMR.1963 CMR.1964 CMR.1965 CMR.1966 CMR.1967 CMR.1968
## 1 125.2 124.3 123 120.9 118.7 116.7 114.4 111.9
## CMR.1969 CMR.1970 CMR.1971 CMR.1972 CMR.1973 CMR.1974 CMR.1975 CMR.1976
## 1 109.5 107.1 105.1 102.7 100.4 98.2 95.7 93.3
## CMR.1977 CMR.1978 CMR.1979 CMR.1980 CMR.1981 CMR.1982 CMR.1983 CMR.1984
## 1 90.8 88.3 85.7 82.9 80.2 77.5 74.9 72.2
## CMR.1985 CMR.1986 CMR.1987 CMR.1988 CMR.1989 CMR.1990 CMR.1991 CMR.1992
## 1 69.7 67.1 64.5 61.7 59.1 56.5 53.9 51.6
## CMR.1993 CMR.1994 CMR.1995 CMR.1996 CMR.1997 CMR.1998 CMR.1999 CMR.2000
## 1 49.3 47.3 45.5 43.9 42.4 41 39.7 38.3
## CMR.2001 CMR.2002 CMR.2003 CMR.2004 CMR.2005 CMR.2006 CMR.2007 CMR.2008
## 1 36.9 35.4 33.9 32.4 31 29.4 27.7 26
## CMR.2009 CMR.2010 CMR.2011 CMR.2012 CMR.2013 CMR.2014 CMR.2015 CMR.2016
## 1 24.2 22.4 20.7 18.7 17 15.3 13.9 12.7
## CMR.2017 Child.deaths.1.4years.1950 Child.deaths.1.4years.1951
## 1 11.6 NA NA
## Child.deaths.1.4years.1952 Child.deaths.1.4years.1953
## 1 NA NA
## Child.deaths.1.4years.1954 Child.deaths.1.4years.1955
## 1 NA NA
## Child.deaths.1.4years.1956 Child.deaths.1.4years.1957
## 1 NA NA
## Child.deaths.1.4years.1958 Child.deaths.1.4years.1959
## 1 NA NA
## Child.deaths.1.4years.1960 Child.deaths.1.4years.1961
## 1 NA NA
## Child.deaths.1.4years.1962 Child.deaths.1.4years.1963
## 1 NA NA
## Child.deaths.1.4years.1964 Child.deaths.1.4years.1965
## 1 NA 45877
## Child.deaths.1.4years.1966 Child.deaths.1.4years.1967
## 1 46140 46431
## Child.deaths.1.4years.1968 Child.deaths.1.4years.1969
## 1 46646 46934
## Child.deaths.1.4years.1970 Child.deaths.1.4years.1971
## 1 47165 47449
## Child.deaths.1.4years.1972 Child.deaths.1.4years.1973
## 1 47667 47855
## Child.deaths.1.4years.1974 Child.deaths.1.4years.1975
## 1 48043 48036
## Child.deaths.1.4years.1976 Child.deaths.1.4years.1977
## 1 47997 47772
## Child.deaths.1.4years.1978 Child.deaths.1.4years.1979
## 1 47380 46658
## Child.deaths.1.4years.1980 Child.deaths.1.4years.1981
## 1 45658 44430
## Child.deaths.1.4years.1982 Child.deaths.1.4years.1983
## 1 43043 41492
## Child.deaths.1.4years.1984 Child.deaths.1.4years.1985
## 1 39732 37916
## Child.deaths.1.4years.1986 Child.deaths.1.4years.1987
## 1 36079 34303
## Child.deaths.1.4years.1988 Child.deaths.1.4years.1989
## 1 32611 31163
## Child.deaths.1.4years.1990 Child.deaths.1.4years.1991
## 1 29952 29066
## Child.deaths.1.4years.1992 Child.deaths.1.4years.1993
## 1 28517 28265
## Child.deaths.1.4years.1994 Child.deaths.1.4years.1995
## 1 28290 28584
## Child.deaths.1.4years.1996 Child.deaths.1.4years.1997
## 1 29036 29601
## Child.deaths.1.4years.1998 Child.deaths.1.4years.1999
## 1 30167 30704
## Child.deaths.1.4years.2000 Child.deaths.1.4years.2001
## 1 31056 31225
## Child.deaths.1.4years.2002 Child.deaths.1.4years.2003
## 1 31141 30859
## Child.deaths.1.4years.2004 Child.deaths.1.4years.2005
## 1 30412 29812
## Child.deaths.1.4years.2006 Child.deaths.1.4years.2007
## 1 29004 27947
## Child.deaths.1.4years.2008 Child.deaths.1.4years.2009
## 1 26691 25274
## Child.deaths.1.4years.2010 Child.deaths.1.4years.2011
## 1 23685 21932
## Child.deaths.1.4years.2012 Child.deaths.1.4years.2013
## 1 20025 18158
## Child.deaths.1.4years.2014 Child.deaths.1.4years.2015
## 1 16430 14963
## Child.deaths.1.4years.2016 Child.deaths.1.4years.2017
## 1 13735 12581
#print(head(infant, n=1))
#print(head(und5, n=1))
# Bind the 3 datasets together - child, infant, und5
master = data.frame()
datasets = list(child ,infant,und5)
for (df in datasets) {
df = filter(df, Uncertainty.bounds. == 'Median')
df = gather(df, Years, Value, 4:139)
df$Type = str_sub(df$Years, end = -6)
df$Year = str_sub(df$Years, start =-4, end = -1)
df = select(df, -Uncertainty.bounds., -Years, -ISO.Code)
# print(dim(df))
master = bind_rows(master,df)
# print(dim(master))
}
## Warning in bind_rows_(x, .id): Unequal factor levels: coercing to character
## Warning in bind_rows_(x, .id): binding character and factor vector,
## coercing into character vector
## Warning in bind_rows_(x, .id): binding character and factor vector,
## coercing into character vector
## Warning in bind_rows_(x, .id): binding character and factor vector,
## coercing into character vector
# Check out master dataframe to make sure 3 dataframes were properly bound
print(dim(master))
## [1] 79560 4
print(head(master))
## Country Value Type Year
## 1 Afghanistan NA CMR 1950
## 2 Angola NA CMR 1950
## 3 Albania NA CMR 1950
## 4 Andorra NA CMR 1950
## 5 United Arab Emirates NA CMR 1950
## 6 Argentina NA CMR 1950
print(tail(master))
## Country Value Type Year
## 79555 Vanuatu 188 Under.five.Deaths 2017
## 79556 Samoa 79 Under.five.Deaths 2017
## 79557 Yemen 47966 Under.five.Deaths 2017
## 79558 South Africa 43254 Under.five.Deaths 2017
## 79559 Zambia 37604 Under.five.Deaths 2017
## 79560 Zimbabwe 26663 Under.five.Deaths 2017
require(plyr)
# Check out unique types
print(unique(master$Type))
## [1] "CMR" "Child.deaths.1.4years" "IMR"
## [4] "Infant.Deaths" "U5MR" "Under.five.Deaths"
# Recode all types consistently
master$NewType = revalue(master$Type, c("Child.deaths.1.4years" = "CMR",
"Infant.Deaths" = "IMR", "Under.five.Deaths" = "U5MR",
"CMR" = "CMR", "IMR" = "IMR",
"U5MR" = "U5MR"))
# Check out unique types again
print(unique(master$NewType))
## [1] "CMR" "IMR" "U5MR"
master = select(master, -Type)
detach(package:plyr)
# Check out the classes of each column
sapply(master, class)
## Country Value Year NewType
## "character" "numeric" "character" "character"
Note on Type and Values
IMR is the infant mortality rate (Probability of dying between birth and exactly 1 year of age, expressed per 1,000 live births)
CMR is the child mortality rate (Probability of dying between 1 and 4 years of age, expressed per 1,000 children age 1)
U5MR is the under-five mortality rate (Probability of dying between birth and exactly 5 years of age, expressed per 1,000 live births)
# Remove null values for countries that have no mortality rate entries a given year.
# This shrinks the dataset by 17.5K rows
# print(dim(master))
newmaster = master[complete.cases(master),]
# print(dim(newmaster))
print(tail(newmaster, n=10))
## Country Value Year NewType
## 79551 Uzbekistan 14702 2017 U5MR
## 79552 Saint Vincent and the Grenadines 27 2017 U5MR
## 79553 Venezuela (Bolivarian Republic of)** 18499 2017 U5MR
## 79554 Viet Nam 32790 2017 U5MR
## 79555 Vanuatu 188 2017 U5MR
## 79556 Samoa 79 2017 U5MR
## 79557 Yemen 47966 2017 U5MR
## 79558 South Africa 43254 2017 U5MR
## 79559 Zambia 37604 2017 U5MR
## 79560 Zimbabwe 26663 2017 U5MR
How do infant and child and under-five mortality rates compare with each other globally?
newmaster %>%
group_by(NewType) %>%
summarize(mean_mortality = mean(Value), total_mortality = sum(Value))
## # A tibble: 3 x 3
## NewType mean_mortality total_mortality
## <chr> <dbl> <dbl>
## 1 CMR 9675. 197493213.
## 2 IMR 23488. 497779222.
## 3 U5MR 31727. 647640459.
There were a total of 500 million infant mortality deaths in the world and 23.5K on average per country in the past 67 years. Although infant mortality is only measured across one year, there is a 2.5 higher likelihood, on average, that an child will die between birth and 1 year of age than between the ages of 1 and 4.
u5mragg = newmaster %>%
group_by(NewType, Year) %>%
summarize(mean_mortality = mean(Value), total_mortality = sum(Value)) %>%
filter(Year %in% c('1950', '1960', '1970', '1980','1990','2000','2010')) %>%
filter(NewType == 'U5MR')
u5mragg
## # A tibble: 7 x 4
## # Groups: NewType [1]
## NewType Year mean_mortality total_mortality
## <chr> <chr> <dbl> <dbl>
## 1 U5MR 1950 169. 5582.
## 2 U5MR 1960 56525. 10061478.
## 3 U5MR 1970 45345. 11744328.
## 4 U5MR 1980 42221. 13679596.
## 5 U5MR 1990 32297. 12595808.
## 6 U5MR 2000 25216. 9834329.
## 7 U5MR 2010 17948. 6999758.
# mean under 5 mortality globally over time
ggplot(data=u5mragg, aes(x=Year, y=mean_mortality, group=1)) +
geom_line()+
geom_point()
# total under 5 mortality globally over time
ggplot(data=u5mragg, aes(x=Year, y=total_mortality, group=1)) +
geom_line()+
geom_point()
# count of countries included in evaluation year over year
ctyear = dplyr::count(newmaster, Year)
ggplot(data=ctyear, aes(x=Year, y=n, group=1)) +
geom_line()+
geom_point() +
theme(axis.text.x = element_text(angle = 90, vjust = 1, hjust=1))
The under five mortality rate (U5M) encompasses both IMR and CMR and therefore a general metric that is a good proxy for both. On average, the U5M appears to be decreasing over the years globally. Though it is odd that the total incidents of U5M peak in 1980, this is due to the fact that the number of countries included in the dataset increased steadily until 1980. After that point, it held steady. This supports the conclusion extracted from both the mean and total graphs that the U5M rate is decreasing.
Obviously, this analysis is very high level and there is opportunity to dig into trends of individual countries, regions, etc. In addition, joining this dataset with other global economic and health indicators would open up further doors to digging into the possible causes of the global decrease in U5M rate and – I suspect – big differences across countries and regions.