library(tidyr)
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(ggplot2)
library(stringr)
Data 1 UNICEF under-five child mortality https://sejdemyr.github.io/r-tutorials/basics/data/unicef-u5mr.csv
ï· Create a .CSV file (or optionally, a MySQL database!) that includes all of the information included in the dataset. Youâre encouraged to use a âwideâ structure similar to how the information appears in the discussion item, so that you can practice tidying and #transformations as described below. ï· Read the information from your .CSV file into R, and use tidyr and dplyr as needed to tidy and transform your data. [Most of your grade will be based on this step!]
dt <- read.csv("unicef-u5mr.csv")
Few first few rows
head(dt)
## CountryName U5MR.1950 U5MR.1951 U5MR.1952 U5MR.1953 U5MR.1954
## 1 Afghanistan NA NA NA NA NA
## 2 Albania NA NA NA NA NA
## 3 Algeria NA NA NA NA 251
## 4 Andorra NA NA NA NA NA
## 5 Angola NA NA NA NA NA
## 6 Antigua & Barbuda NA NA NA NA NA
## U5MR.1955 U5MR.1956 U5MR.1957 U5MR.1958 U5MR.1959 U5MR.1960 U5MR.1961
## 1 NA NA NA NA NA NA 356.5
## 2 NA NA NA NA NA NA NA
## 3 249.9 249 248 247.5 246.7 246.3 246.1
## 4 NA NA NA NA NA NA NA
## 5 NA NA NA NA NA NA NA
## 6 NA NA NA NA NA NA NA
## U5MR.1962 U5MR.1963 U5MR.1964 U5MR.1965 U5MR.1966 U5MR.1967 U5MR.1968
## 1 350.6 345.0 339.7 334.1 328.7 323.3 318.1
## 2 NA NA NA NA NA NA NA
## 3 246.2 246.8 247.4 248.2 248.7 248.4 247.4
## 4 NA NA NA NA NA NA NA
## 5 NA NA NA NA NA NA NA
## 6 NA NA NA NA NA NA NA
## U5MR.1969 U5MR.1970 U5MR.1971 U5MR.1972 U5MR.1973 U5MR.1974 U5MR.1975
## 1 313.0 307.8 302.1 296.4 290.8 284.9 279.4
## 2 NA NA NA NA NA NA NA
## 3 245.3 241.7 236.5 230.0 222.5 214.2 205.0
## 4 NA NA NA NA NA NA NA
## 5 NA NA NA NA NA NA NA
## 6 NA NA NA NA NA NA NA
## U5MR.1976 U5MR.1977 U5MR.1978 U5MR.1979 U5MR.1980 U5MR.1981 U5MR.1982
## 1 273.6 267.8 261.6 255.5 249.1 242.7 236.2
## 2 NA NA 91.1 84.7 78.6 73.0 67.8
## 3 195.2 184.9 173.8 161.8 148.1 132.5 115.8
## 4 NA NA NA NA NA NA NA
## 5 NA NA NA NA 234.1 232.8 231.5
## 6 NA NA NA NA NA NA NA
## U5MR.1983 U5MR.1984 U5MR.1985 U5MR.1986 U5MR.1987 U5MR.1988 U5MR.1989
## 1 229.7 222.9 216.0 209.2 202.1 195.0 187.8
## 2 62.8 58.3 54.3 50.7 47.6 44.9 42.5
## 3 99.2 83.8 71.2 61.9 55.4 51.2 48.5
## 4 NA NA NA NA NA NA NA
## 5 230.2 229.1 228.3 227.5 226.9 226.5 226.2
## 6 NA NA NA NA NA NA NA
## U5MR.1990 U5MR.1991 U5MR.1992 U5MR.1993 U5MR.1994 U5MR.1995 U5MR.1996
## 1 181.0 174.2 167.8 162.0 156.8 152.3 148.6
## 2 40.6 38.8 37.3 36.0 34.6 33.2 31.8
## 3 46.8 45.7 44.9 44.1 43.3 42.5 41.8
## 4 8.5 7.9 7.4 6.9 6.4 6.0 5.7
## 5 226.0 225.9 226.0 225.8 225.5 224.8 224.0
## 6 25.5 24.2 23.1 21.9 20.8 19.7 18.8
## U5MR.1997 U5MR.1998 U5MR.1999 U5MR.2000 U5MR.2001 U5MR.2002 U5MR.2003
## 1 145.5 142.6 139.9 137.0 133.8 130.3 126.8
## 2 30.3 28.9 27.5 26.2 24.9 23.6 22.5
## 3 41.1 40.6 40.2 39.7 38.9 37.8 36.5
## 4 5.3 5.0 4.8 4.6 4.4 4.2 4.1
## 5 222.6 220.8 218.9 216.7 214.1 211.7 209.2
## 6 17.9 17.0 16.2 15.5 14.8 14.1 13.5
## U5MR.2004 U5MR.2005 U5MR.2006 U5MR.2007 U5MR.2008 U5MR.2009 U5MR.2010
## 1 123.2 119.6 116.3 113.2 110.4 107.6 105.0
## 2 21.5 20.5 19.5 18.7 17.9 17.3 16.6
## 3 35.1 33.6 32.1 30.7 29.4 28.3 27.3
## 4 4.0 3.9 3.7 3.6 3.5 3.4 3.3
## 5 206.7 203.9 200.5 196.4 192.0 187.3 182.5
## 6 12.9 12.4 11.8 11.3 10.9 10.4 9.9
## U5MR.2011 U5MR.2012 U5MR.2013 U5MR.2014 U5MR.2015
## 1 102.3 99.5 96.7 93.9 91.1
## 2 16.0 15.5 14.9 14.4 14.0
## 3 26.6 26.1 25.8 25.6 25.5
## 4 3.2 3.1 3.0 2.9 2.8
## 5 177.3 172.2 167.1 162.2 156.9
## 6 9.5 9.1 8.7 8.4 8.1
View column names
names(dt)
## [1] "CountryName" "U5MR.1950" "U5MR.1951" "U5MR.1952" "U5MR.1953"
## [6] "U5MR.1954" "U5MR.1955" "U5MR.1956" "U5MR.1957" "U5MR.1958"
## [11] "U5MR.1959" "U5MR.1960" "U5MR.1961" "U5MR.1962" "U5MR.1963"
## [16] "U5MR.1964" "U5MR.1965" "U5MR.1966" "U5MR.1967" "U5MR.1968"
## [21] "U5MR.1969" "U5MR.1970" "U5MR.1971" "U5MR.1972" "U5MR.1973"
## [26] "U5MR.1974" "U5MR.1975" "U5MR.1976" "U5MR.1977" "U5MR.1978"
## [31] "U5MR.1979" "U5MR.1980" "U5MR.1981" "U5MR.1982" "U5MR.1983"
## [36] "U5MR.1984" "U5MR.1985" "U5MR.1986" "U5MR.1987" "U5MR.1988"
## [41] "U5MR.1989" "U5MR.1990" "U5MR.1991" "U5MR.1992" "U5MR.1993"
## [46] "U5MR.1994" "U5MR.1995" "U5MR.1996" "U5MR.1997" "U5MR.1998"
## [51] "U5MR.1999" "U5MR.2000" "U5MR.2001" "U5MR.2002" "U5MR.2003"
## [56] "U5MR.2004" "U5MR.2005" "U5MR.2006" "U5MR.2007" "U5MR.2008"
## [61] "U5MR.2009" "U5MR.2010" "U5MR.2011" "U5MR.2012" "U5MR.2013"
## [66] "U5MR.2014" "U5MR.2015"
View data types
sapply(dt,class)
## CountryName U5MR.1950 U5MR.1951 U5MR.1952 U5MR.1953 U5MR.1954
## "factor" "numeric" "numeric" "numeric" "numeric" "numeric"
## U5MR.1955 U5MR.1956 U5MR.1957 U5MR.1958 U5MR.1959 U5MR.1960
## "numeric" "numeric" "numeric" "numeric" "numeric" "numeric"
## U5MR.1961 U5MR.1962 U5MR.1963 U5MR.1964 U5MR.1965 U5MR.1966
## "numeric" "numeric" "numeric" "numeric" "numeric" "numeric"
## U5MR.1967 U5MR.1968 U5MR.1969 U5MR.1970 U5MR.1971 U5MR.1972
## "numeric" "numeric" "numeric" "numeric" "numeric" "numeric"
## U5MR.1973 U5MR.1974 U5MR.1975 U5MR.1976 U5MR.1977 U5MR.1978
## "numeric" "numeric" "numeric" "numeric" "numeric" "numeric"
## U5MR.1979 U5MR.1980 U5MR.1981 U5MR.1982 U5MR.1983 U5MR.1984
## "numeric" "numeric" "numeric" "numeric" "numeric" "numeric"
## U5MR.1985 U5MR.1986 U5MR.1987 U5MR.1988 U5MR.1989 U5MR.1990
## "numeric" "numeric" "numeric" "numeric" "numeric" "numeric"
## U5MR.1991 U5MR.1992 U5MR.1993 U5MR.1994 U5MR.1995 U5MR.1996
## "numeric" "numeric" "numeric" "numeric" "numeric" "numeric"
## U5MR.1997 U5MR.1998 U5MR.1999 U5MR.2000 U5MR.2001 U5MR.2002
## "numeric" "numeric" "numeric" "numeric" "numeric" "numeric"
## U5MR.2003 U5MR.2004 U5MR.2005 U5MR.2006 U5MR.2007 U5MR.2008
## "numeric" "numeric" "numeric" "numeric" "numeric" "numeric"
## U5MR.2009 U5MR.2010 U5MR.2011 U5MR.2012 U5MR.2013 U5MR.2014
## "numeric" "numeric" "numeric" "numeric" "numeric" "numeric"
## U5MR.2015
## "numeric"
Transform wide to long format
dt_tidy <- gather(dt, Year,Value,-CountryName)
head(dt_tidy)
## CountryName Year Value
## 1 Afghanistan U5MR.1950 NA
## 2 Albania U5MR.1950 NA
## 3 Algeria U5MR.1950 NA
## 4 Andorra U5MR.1950 NA
## 5 Angola U5MR.1950 NA
## 6 Antigua & Barbuda U5MR.1950 NA
Extract Year and convert to numeric
dt_tidy$Year <- str_extract(dt_tidy$Year,"\\d+$")
dt_tidy$Year <- as.numeric(dt_tidy$Year)
ï· Perform the analysis requested in the discussion item.
Average Child Mortality over the years
dt_tidy %>%
group_by(Year)%>%
summarise(avg = mean(Value, na.rm = TRUE)) %>%
ggplot(aes(Year,avg))+geom_line()+theme_classic()+ggtitle("AVerage Child Mortality Over The Years")
From the graph we can see that average child mortality has decreased over the years.
Top 5 country with highest child mortality
dt_tidy %>%
filter(Year == max(Year))%>%
arrange(-Value)%>%
head()
## CountryName Year Value
## 1 Angola 2015 156.9
## 2 Chad 2015 138.7
## 3 Somalia 2015 136.8
## 4 Central African Republic 2015 130.1
## 5 Sierra Leone 2015 120.4
## 6 Mali 2015 114.7
Angola,Chad,Somalia,Central African Republic ,Sierra Leone, Mali are the top country with highest child mortality in year 2015. Top 5 country with lowest child mortality
dt_tidy %>%
filter(Year == max(Year))%>%
arrange(Value)%>%
head()
## CountryName Year Value
## 1 Luxembourg 2015 1.9
## 2 Iceland 2015 2.0
## 3 Finland 2015 2.3
## 4 Norway 2015 2.6
## 5 Slovenia 2015 2.6
## 6 Cyprus 2015 2.7
Luxembourg , Iceland,Finland , Norway , Slovenia, Cyprus are the top country with lowest child mortality in year 2015.ï·
Conclusion: From the analysis we can say that child mortality has decrease gradually over the years. African countries has the highest child mortality and developed countries lowest child mortality. Your code should be in an R Markdown file, posted to rpubs.com, and should include narrative descriptions of your data cleanup work, analysis, and conclusions.