Improvements have been made worldwide in reducing maternal and child morbidities and mortalities. However, rates are still high. According to the World Health Organization (WHO), approximately 810 women died from preventable causes of pregnancy and childbirth every day in 2017 (WHO, 2019).
Maternal mortality rates for most countries result from complex socio-economic forces including a lack of access to skilled care during pregnancy and childbirth and lack of access to safe abortions, even where legal. Poverty, low status of women, lack of education, poor nutrition, heavy workloads and violence are all risk factors of poor outcomes for pregnant women.
To further explore those rates and form insights on the maternal and child health situation worldwide, we will analyze data from the World Bank Indicators (WBI) on maternal, neonatal and infant mortality.
Maternal Mortality Ratio (MMR) : number of women who die from pregnancy-related causes while pregnant or within 42 days of pregnancy termination per 100,000 live births.
Neonatal Mortality Rate (NMR) : the number of neonates dying before reaching 28 days of age, per 1,000 live births in a given year.
Infant Mortality Rate (IMR) : number of infants dying before reaching one year of age, per 1,000 live births in a given year.
# Load the packgaes
library(readr) # reading
library(plyr)
library(dplyr) # manipulating data
library(tidyr) # pivoting data and dropping missing values
library(janitor) # cleaning column names
library(ggplot2) # plotting data
library(tidyverse)
library(ggthemes) # adding themes to plots
library(WDI)
raw_data<- WDI(country="all",
indicator = c("Mortality_rate_neonatal"="SH.DYN.NMRT",
"Maternal_mortality_ratio"="SH.STA.MMRT",
"Mortality_rate_infant"="SP.DYN.IMRT.IN",
"Mortality_rate_infant_male" ="SP.DYN.IMRT.MA.IN",
"Mortality_rate_infant_female"="SP.DYN.IMRT.FE.IN"), start=2000,end=2020,extra = TRUE)
write.table(raw_data, file = "WDI.csv", sep = ",", col.names = NA,
qmethod = "double")
# define the filename
filename <- "WDI.csv"
# load the CSV file from the local directory
raw_data<- read.csv(filename,header=T,sep=",")
#display the data
head(raw_data,n=5)
X iso2c country year status Mortality_rate_neonatal
1 1 1A Arab World 2000 NA 25.43366
2 2 1A Arab World 2001 NA 24.93282
3 3 1A Arab World 2004 NA 23.37480
4 4 1A Arab World 2005 NA 22.81931
5 5 1A Arab World 2002 NA 24.44103
Maternal_mortality_ratio Mortality_rate_infant Mortality_rate_infant_male
1 250 44.09257 47.61277
2 243 42.80036 46.24128
3 226 39.03268 42.17217
4 214 37.82395 40.86584
5 235 41.50109 44.89622
Mortality_rate_infant_female iso3c region capital longitude latitude
1 40.37474 ARB Aggregates NA NA
2 39.16552 ARB Aggregates NA NA
3 35.73251 ARB Aggregates NA NA
4 34.61242 ARB Aggregates NA NA
5 37.94841 ARB Aggregates NA NA
income lending
1 Aggregates Aggregates
2 Aggregates Aggregates
3 Aggregates Aggregates
4 Aggregates Aggregates
5 Aggregates Aggregates
# display the column names
names(raw_data)
[1] "X" "iso2c"
[3] "country" "year"
[5] "status" "Mortality_rate_neonatal"
[7] "Maternal_mortality_ratio" "Mortality_rate_infant"
[9] "Mortality_rate_infant_male" "Mortality_rate_infant_female"
[11] "iso3c" "region"
[13] "capital" "longitude"
[15] "latitude" "income"
[17] "lending"
#list types for each attribute
sapply(raw_data, class)
X iso2c
"integer" "character"
country year
"character" "integer"
status Mortality_rate_neonatal
"logical" "numeric"
Maternal_mortality_ratio Mortality_rate_infant
"integer" "numeric"
Mortality_rate_infant_male Mortality_rate_infant_female
"numeric" "numeric"
iso3c region
"character" "character"
capital longitude
"character" "numeric"
latitude income
"numeric" "character"
lending
"character"
#remove the unncessary rows
df<-raw_data[ grep("Aggregates", raw_data$region, invert = TRUE) , ]
head(df,n=5)
X iso2c country year status Mortality_rate_neonatal
106 106 AD Andorra 2006 NA 2.7
107 107 AD Andorra 2002 NA 3.3
108 108 AD Andorra 2003 NA 3.2
109 109 AD Andorra 2005 NA 2.9
110 110 AD Andorra 2019 NA 1.4
Maternal_mortality_ratio Mortality_rate_infant Mortality_rate_infant_male
106 NA 5.3 5.8
107 NA 6.5 7.2
108 NA 6.2 6.9
109 NA 5.6 6.2
110 NA 2.5 2.7
Mortality_rate_infant_female iso3c region capital
106 4.7 AND Europe & Central Asia Andorra la Vella
107 5.8 AND Europe & Central Asia Andorra la Vella
108 5.5 AND Europe & Central Asia Andorra la Vella
109 4.9 AND Europe & Central Asia Andorra la Vella
110 2.2 AND Europe & Central Asia Andorra la Vella
longitude latitude income lending
106 1.5218 42.5075 High income Not classified
107 1.5218 42.5075 High income Not classified
108 1.5218 42.5075 High income Not classified
109 1.5218 42.5075 High income Not classified
110 1.5218 42.5075 High income Not classified
#select all the indicators and put them into a new column
df1 <- df%>%
pivot_longer(
cols = c("Mortality_rate_neonatal","Maternal_mortality_ratio",
"Mortality_rate_infant","Mortality_rate_infant_male","Mortality_rate_infant_female"),
names_to = "indicator", values_to = "value")%>%
#select the neccessary columns
select("country","year", "indicator","value","region","longitude","latitude" )%>%
arrange(country,indicator,year)
head(df1,n=17)
# A tibble: 17 x 7
country year indicator value region longitude latitude
<chr> <int> <chr> <dbl> <chr> <dbl> <dbl>
1 Afghanistan 2000 Maternal_mortality_ratio 1450 South As~ 69.2 34.5
2 Afghanistan 2001 Maternal_mortality_ratio 1390 South As~ 69.2 34.5
3 Afghanistan 2002 Maternal_mortality_ratio 1300 South As~ 69.2 34.5
4 Afghanistan 2003 Maternal_mortality_ratio 1240 South As~ 69.2 34.5
5 Afghanistan 2004 Maternal_mortality_ratio 1180 South As~ 69.2 34.5
6 Afghanistan 2005 Maternal_mortality_ratio 1140 South As~ 69.2 34.5
7 Afghanistan 2006 Maternal_mortality_ratio 1120 South As~ 69.2 34.5
8 Afghanistan 2007 Maternal_mortality_ratio 1090 South As~ 69.2 34.5
9 Afghanistan 2008 Maternal_mortality_ratio 1030 South As~ 69.2 34.5
10 Afghanistan 2009 Maternal_mortality_ratio 993 South As~ 69.2 34.5
11 Afghanistan 2010 Maternal_mortality_ratio 954 South As~ 69.2 34.5
12 Afghanistan 2011 Maternal_mortality_ratio 905 South As~ 69.2 34.5
13 Afghanistan 2012 Maternal_mortality_ratio 858 South As~ 69.2 34.5
14 Afghanistan 2013 Maternal_mortality_ratio 810 South As~ 69.2 34.5
15 Afghanistan 2014 Maternal_mortality_ratio 786 South As~ 69.2 34.5
16 Afghanistan 2015 Maternal_mortality_ratio 701 South As~ 69.2 34.5
17 Afghanistan 2016 Maternal_mortality_ratio 673 South As~ 69.2 34.5
# checking unique indicators
unique(df1[c("indicator")])
# A tibble: 5 x 1
indicator
<chr>
1 Maternal_mortality_ratio
2 Mortality_rate_infant
3 Mortality_rate_infant_female
4 Mortality_rate_infant_male
5 Mortality_rate_neonatal
#checking for unique regions
unique(df1[c("region")])
# A tibble: 8 x 1
region
<chr>
1 South Asia
2 Europe & Central Asia
3 Middle East & North Africa
4 East Asia & Pacific
5 Sub-Saharan Africa
6 Latin America & Caribbean
7 North America
8 <NA>
# compute yearly means by regions and create data frame
region_mortality <- df1 %>%
group_by(year, indicator, region) %>%
drop_na() %>%
summarise(mean_mortality = mean(value))
`summarise()` has grouped output by 'year', 'indicator'. You can override using the `.groups` argument.
head(region_mortality)
# A tibble: 6 x 4
# Groups: year, indicator [1]
year indicator region mean_mortality
<int> <chr> <chr> <dbl>
1 2000 Maternal_mortality_ratio East Asia & Pacific 164.
2 2000 Maternal_mortality_ratio Europe & Central Asia 21.2
3 2000 Maternal_mortality_ratio Latin America & Caribbean 110.
4 2000 Maternal_mortality_ratio Middle East & North Africa 86.2
5 2000 Maternal_mortality_ratio North America 10.5
6 2000 Maternal_mortality_ratio South Asia 462.
# compute yearly means and create data frame
world_mortality <- df1 %>%
group_by(year, indicator) %>%
drop_na() %>%
summarise(mean_mortality = mean(value))
`summarise()` has grouped output by 'year'. You can override using the `.groups` argument.
tail(world_mortality)
# A tibble: 6 x 3
# Groups: year [2]
year indicator mean_mortality
<int> <chr> <dbl>
1 2019 Mortality_rate_infant_male 23.0
2 2019 Mortality_rate_neonatal 12.7
3 2020 Mortality_rate_infant 20.4
4 2020 Mortality_rate_infant_female 18.4
5 2020 Mortality_rate_infant_male 22.4
6 2020 Mortality_rate_neonatal 12.4
df1 %>%
group_by(year, indicator) %>%
drop_na() %>%
summarise(mean = mean(value)) %>%
pivot_wider(names_from = indicator, values_from = mean)
`summarise()` has grouped output by 'year'. You can override using the `.groups` argument.
# A tibble: 21 x 6
# Groups: year [21]
year Maternal_mortali~ Mortality_rate_i~ Mortality_rate_i~ Mortality_rate_i~
<int> <dbl> <dbl> <dbl> <dbl>
1 2000 276. 38.5 35.0 41.8
2 2001 267. 37.1 33.7 40.3
3 2002 260. 35.8 32.5 38.9
4 2003 252. 34.5 31.3 37.5
5 2004 242. 33.3 30.2 36.3
6 2005 231. 32.1 29.0 34.9
7 2006 222. 30.9 28.0 33.7
8 2007 213. 29.9 27.0 32.6
9 2008 206. 29.0 26.2 31.6
10 2009 199. 28.0 25.3 30.6
# ... with 11 more rows, and 1 more variable: Mortality_rate_neonatal <dbl>
#rename(replace=c(mean_mmr= "Mortality rate,neonatal",
#"mean_nmmr"="Martenal mortality ratio",
#"mean_imr"="Mortality rate,infant",
##"mean_imr_m"="Mortality rate,infant,male",
#"mean_imr_f"="Mortality rate,infant,female")
#),warn_missing = FALSE)
df1 %>%
group_by(year, indicator, region) %>%
drop_na() %>%
summarise(mean = mean(value)) %>%
pivot_wider(names_from = indicator, values_from = mean)
`summarise()` has grouped output by 'year', 'indicator'. You can override using the `.groups` argument.
# A tibble: 147 x 7
# Groups: year [21]
year region Maternal_mortality~ Mortality_rate_i~ Mortality_rate_inf~
<int> <chr> <dbl> <dbl> <dbl>
1 2000 East Asia & ~ 164. 31.5 28.3
2 2000 Europe & Cen~ 21.2 14.7 13.0
3 2000 Latin Americ~ 110. 24.2 21.7
4 2000 Middle East ~ 86.2 26.2 24.1
5 2000 North America 10.5 6.2 5.6
6 2000 South Asia 462. 58.4 54.7
7 2000 Sub-Saharan ~ 759. 80.9 74.1
8 2001 East Asia & ~ 158. 30.3 27.2
9 2001 Europe & Cen~ 20.2 13.9 12.3
10 2001 Latin Americ~ 108. 23.5 21.0
# ... with 137 more rows, and 2 more variables:
# Mortality_rate_infant_male <dbl>, Mortality_rate_neonatal <dbl>
df1 %>%
filter(indicator == "Maternal_mortality_ratio") %>%
select(year, country,region, value) %>%
drop_na() %>%
arrange(value) %>%
group_by(year) %>%
slice(1:1) %>%
print(n=21)
# A tibble: 18 x 4
# Groups: year [18]
year country region value
<int> <chr> <chr> <dbl>
1 2000 Greece Europe & Central Asia 3
2 2001 Greece Europe & Central Asia 3
3 2002 Greece Europe & Central Asia 3
4 2003 Greece Europe & Central Asia 3
5 2004 Greece Europe & Central Asia 3
6 2005 Greece Europe & Central Asia 3
7 2006 Italy Europe & Central Asia 2
8 2007 Italy Europe & Central Asia 2
9 2008 Italy Europe & Central Asia 2
10 2009 Italy Europe & Central Asia 2
11 2010 Italy Europe & Central Asia 2
12 2011 Italy Europe & Central Asia 2
13 2012 Italy Europe & Central Asia 2
14 2013 Italy Europe & Central Asia 2
15 2014 Italy Europe & Central Asia 2
16 2015 Italy Europe & Central Asia 2
17 2016 Italy Europe & Central Asia 2
18 2017 Belarus Europe & Central Asia 2
Observations
df1 %>%
filter(indicator == "Maternal_mortality_ratio") %>%
select(year, country,region, value) %>%
drop_na() %>%
arrange(desc(value)) %>%
group_by(year) %>%
slice(1:1) %>%
print(n=21)
# A tibble: 18 x 4
# Groups: year [18]
year country region value
<int> <chr> <chr> <dbl>
1 2000 Sierra Leone Sub-Saharan Africa 2480
2 2001 Sierra Leone Sub-Saharan Africa 2250
3 2002 Sierra Leone Sub-Saharan Africa 2080
4 2003 Sierra Leone Sub-Saharan Africa 1960
5 2004 Sierra Leone Sub-Saharan Africa 1850
6 2005 Sierra Leone Sub-Saharan Africa 1760
7 2006 Sierra Leone Sub-Saharan Africa 1680
8 2007 Sierra Leone Sub-Saharan Africa 1610
9 2008 Sierra Leone Sub-Saharan Africa 1530
10 2009 Sierra Leone Sub-Saharan Africa 1450
11 2010 Sierra Leone Sub-Saharan Africa 1360
12 2011 Sierra Leone Sub-Saharan Africa 1270
13 2012 Sierra Leone Sub-Saharan Africa 1210
14 2013 Sierra Leone Sub-Saharan Africa 1180
15 2014 Sierra Leone Sub-Saharan Africa 1190
16 2015 Sierra Leone Sub-Saharan Africa 1180
17 2016 Chad Sub-Saharan Africa 1140
18 2017 South Sudan Sub-Saharan Africa 1150
Observations
-The country with the highest yearly maternal mortality ratio has consistently been from the Sub-Saharan Africa region (Sierra Leone, Chad, and South Sudan).
df1 %>%
filter(indicator == "Mortality_rate_neonatal") %>%
select(year, country,region, value) %>%
drop_na() %>%
arrange(value) %>%
group_by(year) %>%
slice(1:1) %>%
print(n=21)
# A tibble: 21 x 4
# Groups: year [21]
year country region value
<int> <chr> <chr> <dbl>
1 2000 Singapore East Asia & Pacific 1.6
2 2001 Singapore East Asia & Pacific 1.5
3 2002 Singapore East Asia & Pacific 1.4
4 2003 Singapore East Asia & Pacific 1.3
5 2004 Singapore East Asia & Pacific 1.3
6 2005 Singapore East Asia & Pacific 1.2
7 2006 Singapore East Asia & Pacific 1.2
8 2007 Singapore East Asia & Pacific 1.2
9 2008 Japan East Asia & Pacific 1.2
10 2009 Japan East Asia & Pacific 1.2
11 2010 Japan East Asia & Pacific 1.1
12 2011 Japan East Asia & Pacific 1
13 2012 Japan East Asia & Pacific 1
14 2013 Japan East Asia & Pacific 1
15 2014 Japan East Asia & Pacific 0.9
16 2015 Japan East Asia & Pacific 0.9
17 2016 Japan East Asia & Pacific 0.9
18 2017 Japan East Asia & Pacific 0.9
19 2018 Japan East Asia & Pacific 0.9
20 2019 San Marino Europe & Central Asia 0.8
21 2020 Japan East Asia & Pacific 0.8
Observations
df1 %>%
filter(indicator == "Mortality_rate_neonatal") %>%
select(year, country,region, value) %>%
drop_na() %>%
arrange(desc(value)) %>%
group_by(year) %>%
slice(1:1) %>%
print(n=21)
# A tibble: 21 x 4
# Groups: year [21]
year country region value
<int> <chr> <chr> <dbl>
1 2000 Afghanistan South Asia 60.9
2 2001 Afghanistan South Asia 59.7
3 2002 Afghanistan South Asia 58.5
4 2003 Afghanistan South Asia 57.2
5 2004 Afghanistan South Asia 55.9
6 2005 Afghanistan South Asia 54.6
7 2006 Afghanistan South Asia 53.2
8 2007 Afghanistan South Asia 51.7
9 2008 Pakistan South Asia 50.9
10 2009 Pakistan South Asia 50.4
11 2010 Pakistan South Asia 49.8
12 2011 Pakistan South Asia 49
13 2012 Pakistan South Asia 48.1
14 2013 Pakistan South Asia 47.1
15 2014 Pakistan South Asia 46.2
16 2015 Pakistan South Asia 45.2
17 2016 Lesotho Sub-Saharan Africa 45.1
18 2017 Lesotho Sub-Saharan Africa 45.2
19 2018 Lesotho Sub-Saharan Africa 45.1
20 2019 Lesotho Sub-Saharan Africa 44.8
21 2020 Lesotho Sub-Saharan Africa 44.3
Observations
df1 %>%
filter(indicator == "Mortality_rate_infant") %>%
select(year, country,region, value) %>%
drop_na() %>%
arrange(value) %>%
group_by(year) %>%
slice(1:1) %>%
print(n=21)
# A tibble: 21 x 4
# Groups: year [21]
year country region value
<int> <chr> <chr> <dbl>
1 2000 Singapore East Asia & Pacific 3
2 2001 Singapore East Asia & Pacific 2.8
3 2002 Singapore East Asia & Pacific 2.6
4 2003 Singapore East Asia & Pacific 2.5
5 2004 Singapore East Asia & Pacific 2.4
6 2005 Singapore East Asia & Pacific 2.3
7 2006 Singapore East Asia & Pacific 2.3
8 2007 Singapore East Asia & Pacific 2.3
9 2008 Iceland Europe & Central Asia 2.3
10 2009 Iceland Europe & Central Asia 2.2
11 2010 Iceland Europe & Central Asia 2.2
12 2011 Iceland Europe & Central Asia 2.1
13 2012 Iceland Europe & Central Asia 2
14 2013 Iceland Europe & Central Asia 1.9
15 2014 Iceland Europe & Central Asia 1.9
16 2015 Iceland Europe & Central Asia 1.8
17 2016 Iceland Europe & Central Asia 1.8
18 2017 Iceland Europe & Central Asia 1.7
19 2018 Iceland Europe & Central Asia 1.6
20 2019 Iceland Europe & Central Asia 1.6
21 2020 Iceland Europe & Central Asia 1.5
Observations
df1 %>%
filter(indicator == "Mortality_rate_infant") %>%
select(year, country,region, value) %>%
drop_na() %>%
arrange(desc(value)) %>%
group_by(year) %>%
slice(1:1) %>%
print(n=21)
# A tibble: 21 x 4
# Groups: year [21]
year country region value
<int> <chr> <chr> <dbl>
1 2000 Sierra Leone Sub-Saharan Africa 138.
2 2001 Sierra Leone Sub-Saharan Africa 136.
3 2002 Sierra Leone Sub-Saharan Africa 133.
4 2003 Sierra Leone Sub-Saharan Africa 130.
5 2004 Sierra Leone Sub-Saharan Africa 127.
6 2005 Sierra Leone Sub-Saharan Africa 124.
7 2006 Sierra Leone Sub-Saharan Africa 121.
8 2007 Sierra Leone Sub-Saharan Africa 118.
9 2008 Sierra Leone Sub-Saharan Africa 114.
10 2009 Sierra Leone Sub-Saharan Africa 111.
11 2010 Sierra Leone Sub-Saharan Africa 107.
12 2011 Sierra Leone Sub-Saharan Africa 104.
13 2012 Sierra Leone Sub-Saharan Africa 101.
14 2013 Sierra Leone Sub-Saharan Africa 97.7
15 2014 Sierra Leone Sub-Saharan Africa 95.6
16 2015 Sierra Leone Sub-Saharan Africa 95.1
17 2016 Sierra Leone Sub-Saharan Africa 89.7
18 2017 Sierra Leone Sub-Saharan Africa 87.3
19 2018 Sierra Leone Sub-Saharan Africa 84.8
20 2019 Sierra Leone Sub-Saharan Africa 82.4
21 2020 Sierra Leone Sub-Saharan Africa 80.1
Observations
my_theme <- function() {
theme(
text = element_text(family = "Comic Sans MS"),
panel.border = element_rect(linetype = "dashed", fill= NA, color = "black"),
panel.background = element_rect(fill = "#f0f0f0"),
panel.grid.major = element_line(color = "white"),
plot.title = element_text(size = rel(1)),
plot.background = element_blank(),
axis.title = element_text(size = 8),
legend.title = element_blank(),
legend.background = element_blank(),
legend.position = "bottom",
legend.text = element_text(size = 8),
plot.subtitle = element_text(size = rel(0.8), color = "#f03b20")
)
}
region_mortality%>%
filter(indicator == "Maternal_mortality_ratio") %>%
ggplot( aes(x=year, y= mean_mortality, color=region)) +
geom_line(size = 1) +
scale_color_colorblind() +
scale_x_continuous(breaks = seq(2000,2020,5)) +
labs(x="Year",
y="mean MMR (per 100,000 live births) ",
title="Maternal Mortality Ratio (MMR) has been decreasing worldwide ",
subtitle = "Sub Saharan Africa remains the region with the highest MMR mean")+
theme_fivethirtyeight() +
my_theme()
region_mortality %>%
filter(indicator == "Mortality_rate_neonatal") %>%
ggplot( aes(x=year, y= mean_mortality, color=region)) +
geom_line(size = 1) +
scale_color_colorblind() +
scale_x_continuous(breaks = seq(2000,2020,5)) +
labs(x="Year",
y="mean NMR (per 1,000 live births) ",
title="Neonatal Mortality Rate (NMR) has been decreasing worldwide ",
subtitle = "Sub Saharan Africa and South Asia are the regions with the highest NMR means",
) +
theme_fivethirtyeight() +
my_theme()
Observations
region_mortality %>%
filter(indicator == "Mortality_rate_infant") %>%
ggplot( aes(x=year, y= mean_mortality, color=region)) +
geom_line(size = 1) +
scale_color_colorblind() +
scale_x_continuous(breaks = seq(2000,2020,5)) +
labs(x="Year",
y="mean IMR (per 1,000 live births) ",
title="Infant Mortality Rate (IMR) has been decreasing worldwide ",
subtitle = "Sub Saharan Africa remains the region with the highest IMR mean",
) +
theme_fivethirtyeight() +
my_theme()
Observations
imr_sex <- c("Mortality_rate_infant_male", "Mortality_rate_infant_female")
region_mortality %>%
filter(indicator %in% imr_sex) %>%
ggplot( aes(x=year, y= mean_mortality, color=region)) +
geom_line(size =1) +
facet_wrap(~indicator) +
scale_color_colorblind() +
scale_x_continuous(breaks = seq(2000,2020,5)) +
labs(x="Year",
y="mean IMR (per 1,000 live births) ",
title="Mean Infant Mortality Rate ",
subtitle = "Per Sex and Region",
) +
theme_fivethirtyeight() +
my_theme()
world_mortality %>%
filter(indicator %in% imr_sex) %>%
ggplot( aes(x=year, y= mean_mortality, color=indicator)) +
geom_point() +
geom_line() +
scale_color_colorblind() +
scale_x_continuous(breaks = seq(2000,2020,5)) +
labs(x="Year",
y="mean IMR (per 1,000 live births) ",
title="Infant Mortality Rate (IMR) has been decreasing worldwide ",
subtitle = "Male infants had a higher IMR compared to female",
) +
theme_fivethirtyeight() +
my_theme()
Takeways