This is a dataset from Unicef with mortality rates under 5 years across different countries from 1950 to 2015. Data Reference: https://raw.githubusercontent.com/baruab/msdsrepo/main/DATA-607/unicef-u5mr.csv
library(dplyr)
library(downloader)
library(stringr)
library(htmlTable)
library(tidyverse)
library(reshape2)
# Import the data from the csv file
unicef_data <- read.csv("https://raw.githubusercontent.com/baruab/msdsrepo/main/DATA-607/unicef-u5mr.csv", stringsAsFactors = FALSE)
head(unicef_data, 5)
## CountryName U5MR.1950 U5MR.1951 U5MR.1952 U5MR.1953 U5MR.1954 U5MR.1955
## 1 Afghanistan NA NA NA NA NA NA
## 2 Albania NA NA NA NA NA NA
## 3 Algeria NA NA NA NA 251 249.9
## 4 Andorra NA NA NA NA NA NA
## 5 Angola NA NA NA NA NA NA
## U5MR.1956 U5MR.1957 U5MR.1958 U5MR.1959 U5MR.1960 U5MR.1961 U5MR.1962
## 1 NA NA NA NA NA 356.5 350.6
## 2 NA NA NA NA NA NA NA
## 3 249 248 247.5 246.7 246.3 246.1 246.2
## 4 NA NA NA NA NA NA NA
## 5 NA NA NA NA NA NA NA
## U5MR.1963 U5MR.1964 U5MR.1965 U5MR.1966 U5MR.1967 U5MR.1968 U5MR.1969
## 1 345.0 339.7 334.1 328.7 323.3 318.1 313.0
## 2 NA NA NA NA NA NA NA
## 3 246.8 247.4 248.2 248.7 248.4 247.4 245.3
## 4 NA NA NA NA NA NA NA
## 5 NA NA NA NA NA NA NA
## U5MR.1970 U5MR.1971 U5MR.1972 U5MR.1973 U5MR.1974 U5MR.1975 U5MR.1976
## 1 307.8 302.1 296.4 290.8 284.9 279.4 273.6
## 2 NA NA NA NA NA NA NA
## 3 241.7 236.5 230.0 222.5 214.2 205.0 195.2
## 4 NA NA NA NA NA NA NA
## 5 NA NA NA NA NA NA NA
## U5MR.1977 U5MR.1978 U5MR.1979 U5MR.1980 U5MR.1981 U5MR.1982 U5MR.1983
## 1 267.8 261.6 255.5 249.1 242.7 236.2 229.7
## 2 NA 91.1 84.7 78.6 73.0 67.8 62.8
## 3 184.9 173.8 161.8 148.1 132.5 115.8 99.2
## 4 NA NA NA NA NA NA NA
## 5 NA NA NA 234.1 232.8 231.5 230.2
## U5MR.1984 U5MR.1985 U5MR.1986 U5MR.1987 U5MR.1988 U5MR.1989 U5MR.1990
## 1 222.9 216.0 209.2 202.1 195.0 187.8 181.0
## 2 58.3 54.3 50.7 47.6 44.9 42.5 40.6
## 3 83.8 71.2 61.9 55.4 51.2 48.5 46.8
## 4 NA NA NA NA NA NA 8.5
## 5 229.1 228.3 227.5 226.9 226.5 226.2 226.0
## U5MR.1991 U5MR.1992 U5MR.1993 U5MR.1994 U5MR.1995 U5MR.1996 U5MR.1997
## 1 174.2 167.8 162.0 156.8 152.3 148.6 145.5
## 2 38.8 37.3 36.0 34.6 33.2 31.8 30.3
## 3 45.7 44.9 44.1 43.3 42.5 41.8 41.1
## 4 7.9 7.4 6.9 6.4 6.0 5.7 5.3
## 5 225.9 226.0 225.8 225.5 224.8 224.0 222.6
## U5MR.1998 U5MR.1999 U5MR.2000 U5MR.2001 U5MR.2002 U5MR.2003 U5MR.2004
## 1 142.6 139.9 137.0 133.8 130.3 126.8 123.2
## 2 28.9 27.5 26.2 24.9 23.6 22.5 21.5
## 3 40.6 40.2 39.7 38.9 37.8 36.5 35.1
## 4 5.0 4.8 4.6 4.4 4.2 4.1 4.0
## 5 220.8 218.9 216.7 214.1 211.7 209.2 206.7
## U5MR.2005 U5MR.2006 U5MR.2007 U5MR.2008 U5MR.2009 U5MR.2010 U5MR.2011
## 1 119.6 116.3 113.2 110.4 107.6 105.0 102.3
## 2 20.5 19.5 18.7 17.9 17.3 16.6 16.0
## 3 33.6 32.1 30.7 29.4 28.3 27.3 26.6
## 4 3.9 3.7 3.6 3.5 3.4 3.3 3.2
## 5 203.9 200.5 196.4 192.0 187.3 182.5 177.3
## U5MR.2012 U5MR.2013 U5MR.2014 U5MR.2015
## 1 99.5 96.7 93.9 91.1
## 2 15.5 14.9 14.4 14.0
## 3 26.1 25.8 25.6 25.5
## 4 3.1 3.0 2.9 2.8
## 5 172.2 167.1 162.2 156.9
names(unicef_data) <- gsub("U5MR.", "", names(unicef_data))
subset_unicef_data <- select( unicef_data, -1)
# Assign row averages to new variable named row_mean
subset_unicef_data$row_mean <- round(rowMeans(subset_unicef_data, na.rm=TRUE),2)
# Add the first column back to the dataframe
bind_unicef_data <- cbind(select(unicef_data, 1), subset_unicef_data)
# Sort the dataframe by the mean of the mortality rate
highest_child_mortality <- arrange(bind_unicef_data, desc(row_mean)) %>% head(5)
# Display the countries with highest rates
htmlTable( select( highest_child_mortality, c('CountryName', 'row_mean')))
| CountryName | row_mean | |
|---|---|---|
| 1 | Mali | 273.7 |
| 2 | Sierra Leone | 267.95 |
| 3 | Niger | 262.15 |
| 4 | Burkina Faso | 247.16 |
| 5 | Guinea | 235.47 |
subset_child_mortality <- select( highest_child_mortality, c('CountryName', '2011','2012','2013', '2014','2015'))
subset_child_mortality1 <- subset_child_mortality %>%
gather("year", "cases", "2011":"2012":"2013":"2014":"2015")
# Plot the country line graph by the year to identify trend
ggplot(subset_child_mortality1, aes(CountryName,cases, group=year)) +
geom_line(aes(color=year))+
geom_point(aes(shape=year))
The mortality rates consistently decreased from 2011 - 2015 for all the countries.
mean_child_mortality <- subset_child_mortality1 %>% group_by(CountryName) %>% summarise(avg_deaths = mean(cases, na.rm=TRUE))
sd_child_mortality <- subset_child_mortality1 %>% group_by(CountryName) %>% summarise(sd_deaths = sd(cases, na.rm=TRUE))
min_child_mortality <- subset_child_mortality1 %>% group_by(CountryName) %>% summarise(min_deaths = min(cases, na.rm=TRUE))
max_child_mortality <- subset_child_mortality1 %>% group_by(CountryName) %>% summarise(max_deaths = max(cases, na.rm=TRUE))
variance_child_mortality <- subset_child_mortality1 %>% group_by(CountryName) %>% summarise(variance_deaths = var(cases, na.rm=TRUE))
child_mortality_stats <- mean_child_mortality %>% inner_join(sd_child_mortality, by = 'CountryName') %>% inner_join(min_child_mortality, by = 'CountryName') %>% inner_join(max_child_mortality, by = 'CountryName') %>% inner_join(variance_child_mortality, by = 'CountryName')
arrange(child_mortality_stats, desc(avg_deaths))
## # A tibble: 5 x 6
## CountryName avg_deaths sd_deaths min_deaths max_deaths variance_deaths
## <chr> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 Sierra Leone 134. 12.0 120. 151. 144.
## 2 Mali 123. 6.76 115. 132. 45.7
## 3 Niger 105. 8.13 95.5 116. 66.1
## 4 Guinea 101. 5.54 93.7 108. 30.6
## 5 Burkina Faso 97.2 7.23 88.6 107. 52.3
The trend of different statistical values is consistent across countries.
child_mortality_stats1 <- child_mortality_stats
library(reshape2)
#melt data frame into long format
df <- melt(child_mortality_stats1 , id.vars = 'CountryName')
# Everything on the same plot
ggplot(df, aes(CountryName,value, col=variable)) +
geom_line(aes(color=variable))+
geom_point()
# Import the data from the csv file
continent_data <- read.csv("https://pkgstore.datahub.io/JohnSnowLabs/country-and-continent-codes-list/country-and-continent-codes-list-csv_csv/data/b7876b7f496677669644f3d1069d3121/country-and-continent-codes-list-csv_csv.csv", stringsAsFactors = FALSE)
head(continent_data, 5)
## Continent_Name Continent_Code Country_Name
## 1 Asia AS Afghanistan, Islamic Republic of
## 2 Europe EU Albania, Republic of
## 3 Antarctica AN Antarctica (the territory South of 60 deg S)
## 4 Africa AF Algeria, People's Democratic Republic of
## 5 Oceania OC American Samoa
## Two_Letter_Country_Code Three_Letter_Country_Code Country_Number
## 1 AF AFG 4
## 2 AL ALB 8
## 3 AQ ATA 10
## 4 DZ DZA 12
## 5 AS ASM 16
continent_data$Country_Name <- gsub(",.*$", "", continent_data$Country_Name)
continent_data = continent_data %>% dplyr::mutate(CountryName = gsub(",.*$", "", continent_data$Country_Name))
joined_df <- left_join( continent_data,bind_unicef_data, by = "CountryName")
sum_cont_df <- joined_df %>% select( Continent_Name, Continent_Code, row_mean) %>% group_by( Continent_Code) %>% mutate(sum_mean = sum(row_mean, na.rm=TRUE)) %>% summarize(sum_mean = max(sum_mean)) %>% arrange(desc(sum_mean))
htmlTable(sum_cont_df)
| Continent_Code | sum_mean | |
|---|---|---|
| 1 | AF | 7965.11 |
| 2 | AS | 3363.82 |
| 3 | 1093.4 | |
| 4 | EU | 1046.75 |
| 5 | SA | 816.34 |
| 6 | OC | 750.77 |
| 7 | AN | 0 |
top_cont_country_df <- inner_join( continent_data,subset_child_mortality, by = "CountryName")
top_cont_country_df <- top_cont_country_df %>% select( Continent_Name, Continent_Code, CountryName)
htmlTable(top_cont_country_df)
| Continent_Name | Continent_Code | CountryName | |
|---|---|---|---|
| 1 | Africa | AF | Guinea |
| 2 | Africa | AF | Mali |
| 3 | Africa | AF | Niger |
| 4 | Africa | AF | Sierra Leone |
| 5 | Africa | AF | Burkina Faso |
This is to concluded the Africa has the highest cases of child mortality, with all the countries with high cases are in Africa.