library(tidyr)
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(ggplot2)
## Warning: package 'ggplot2' was built under R version 3.5.3
library(stringr)
dataset1 <- read.csv("https://raw.githubusercontent.com/Zchen116/assignment-2/master/unicef-u5mr.csv")
head(dataset1)
## CountryName U5MR.1950 U5MR.1951 U5MR.1952 U5MR.1953 U5MR.1954
## 1 Afghanistan NA NA NA NA NA
## 2 Albania NA NA NA NA NA
## 3 Algeria NA NA NA NA 251
## 4 Andorra NA NA NA NA NA
## 5 Angola NA NA NA NA NA
## 6 Antigua & Barbuda NA NA NA NA NA
## U5MR.1955 U5MR.1956 U5MR.1957 U5MR.1958 U5MR.1959 U5MR.1960 U5MR.1961
## 1 NA NA NA NA NA NA 356.5
## 2 NA NA NA NA NA NA NA
## 3 249.9 249 248 247.5 246.7 246.3 246.1
## 4 NA NA NA NA NA NA NA
## 5 NA NA NA NA NA NA NA
## 6 NA NA NA NA NA NA NA
## U5MR.1962 U5MR.1963 U5MR.1964 U5MR.1965 U5MR.1966 U5MR.1967 U5MR.1968
## 1 350.6 345.0 339.7 334.1 328.7 323.3 318.1
## 2 NA NA NA NA NA NA NA
## 3 246.2 246.8 247.4 248.2 248.7 248.4 247.4
## 4 NA NA NA NA NA NA NA
## 5 NA NA NA NA NA NA NA
## 6 NA NA NA NA NA NA NA
## U5MR.1969 U5MR.1970 U5MR.1971 U5MR.1972 U5MR.1973 U5MR.1974 U5MR.1975
## 1 313.0 307.8 302.1 296.4 290.8 284.9 279.4
## 2 NA NA NA NA NA NA NA
## 3 245.3 241.7 236.5 230.0 222.5 214.2 205.0
## 4 NA NA NA NA NA NA NA
## 5 NA NA NA NA NA NA NA
## 6 NA NA NA NA NA NA NA
## U5MR.1976 U5MR.1977 U5MR.1978 U5MR.1979 U5MR.1980 U5MR.1981 U5MR.1982
## 1 273.6 267.8 261.6 255.5 249.1 242.7 236.2
## 2 NA NA 91.1 84.7 78.6 73.0 67.8
## 3 195.2 184.9 173.8 161.8 148.1 132.5 115.8
## 4 NA NA NA NA NA NA NA
## 5 NA NA NA NA 234.1 232.8 231.5
## 6 NA NA NA NA NA NA NA
## U5MR.1983 U5MR.1984 U5MR.1985 U5MR.1986 U5MR.1987 U5MR.1988 U5MR.1989
## 1 229.7 222.9 216.0 209.2 202.1 195.0 187.8
## 2 62.8 58.3 54.3 50.7 47.6 44.9 42.5
## 3 99.2 83.8 71.2 61.9 55.4 51.2 48.5
## 4 NA NA NA NA NA NA NA
## 5 230.2 229.1 228.3 227.5 226.9 226.5 226.2
## 6 NA NA NA NA NA NA NA
## U5MR.1990 U5MR.1991 U5MR.1992 U5MR.1993 U5MR.1994 U5MR.1995 U5MR.1996
## 1 181.0 174.2 167.8 162.0 156.8 152.3 148.6
## 2 40.6 38.8 37.3 36.0 34.6 33.2 31.8
## 3 46.8 45.7 44.9 44.1 43.3 42.5 41.8
## 4 8.5 7.9 7.4 6.9 6.4 6.0 5.7
## 5 226.0 225.9 226.0 225.8 225.5 224.8 224.0
## 6 25.5 24.2 23.1 21.9 20.8 19.7 18.8
## U5MR.1997 U5MR.1998 U5MR.1999 U5MR.2000 U5MR.2001 U5MR.2002 U5MR.2003
## 1 145.5 142.6 139.9 137.0 133.8 130.3 126.8
## 2 30.3 28.9 27.5 26.2 24.9 23.6 22.5
## 3 41.1 40.6 40.2 39.7 38.9 37.8 36.5
## 4 5.3 5.0 4.8 4.6 4.4 4.2 4.1
## 5 222.6 220.8 218.9 216.7 214.1 211.7 209.2
## 6 17.9 17.0 16.2 15.5 14.8 14.1 13.5
## U5MR.2004 U5MR.2005 U5MR.2006 U5MR.2007 U5MR.2008 U5MR.2009 U5MR.2010
## 1 123.2 119.6 116.3 113.2 110.4 107.6 105.0
## 2 21.5 20.5 19.5 18.7 17.9 17.3 16.6
## 3 35.1 33.6 32.1 30.7 29.4 28.3 27.3
## 4 4.0 3.9 3.7 3.6 3.5 3.4 3.3
## 5 206.7 203.9 200.5 196.4 192.0 187.3 182.5
## 6 12.9 12.4 11.8 11.3 10.9 10.4 9.9
## U5MR.2011 U5MR.2012 U5MR.2013 U5MR.2014 U5MR.2015
## 1 102.3 99.5 96.7 93.9 91.1
## 2 16.0 15.5 14.9 14.4 14.0
## 3 26.6 26.1 25.8 25.6 25.5
## 4 3.2 3.1 3.0 2.9 2.8
## 5 177.3 172.2 167.1 162.2 156.9
## 6 9.5 9.1 8.7 8.4 8.1
names(dataset1)
## [1] "CountryName" "U5MR.1950" "U5MR.1951" "U5MR.1952" "U5MR.1953"
## [6] "U5MR.1954" "U5MR.1955" "U5MR.1956" "U5MR.1957" "U5MR.1958"
## [11] "U5MR.1959" "U5MR.1960" "U5MR.1961" "U5MR.1962" "U5MR.1963"
## [16] "U5MR.1964" "U5MR.1965" "U5MR.1966" "U5MR.1967" "U5MR.1968"
## [21] "U5MR.1969" "U5MR.1970" "U5MR.1971" "U5MR.1972" "U5MR.1973"
## [26] "U5MR.1974" "U5MR.1975" "U5MR.1976" "U5MR.1977" "U5MR.1978"
## [31] "U5MR.1979" "U5MR.1980" "U5MR.1981" "U5MR.1982" "U5MR.1983"
## [36] "U5MR.1984" "U5MR.1985" "U5MR.1986" "U5MR.1987" "U5MR.1988"
## [41] "U5MR.1989" "U5MR.1990" "U5MR.1991" "U5MR.1992" "U5MR.1993"
## [46] "U5MR.1994" "U5MR.1995" "U5MR.1996" "U5MR.1997" "U5MR.1998"
## [51] "U5MR.1999" "U5MR.2000" "U5MR.2001" "U5MR.2002" "U5MR.2003"
## [56] "U5MR.2004" "U5MR.2005" "U5MR.2006" "U5MR.2007" "U5MR.2008"
## [61] "U5MR.2009" "U5MR.2010" "U5MR.2011" "U5MR.2012" "U5MR.2013"
## [66] "U5MR.2014" "U5MR.2015"
sapply(dataset1,class)
## CountryName U5MR.1950 U5MR.1951 U5MR.1952 U5MR.1953 U5MR.1954
## "factor" "numeric" "numeric" "numeric" "numeric" "numeric"
## U5MR.1955 U5MR.1956 U5MR.1957 U5MR.1958 U5MR.1959 U5MR.1960
## "numeric" "numeric" "numeric" "numeric" "numeric" "numeric"
## U5MR.1961 U5MR.1962 U5MR.1963 U5MR.1964 U5MR.1965 U5MR.1966
## "numeric" "numeric" "numeric" "numeric" "numeric" "numeric"
## U5MR.1967 U5MR.1968 U5MR.1969 U5MR.1970 U5MR.1971 U5MR.1972
## "numeric" "numeric" "numeric" "numeric" "numeric" "numeric"
## U5MR.1973 U5MR.1974 U5MR.1975 U5MR.1976 U5MR.1977 U5MR.1978
## "numeric" "numeric" "numeric" "numeric" "numeric" "numeric"
## U5MR.1979 U5MR.1980 U5MR.1981 U5MR.1982 U5MR.1983 U5MR.1984
## "numeric" "numeric" "numeric" "numeric" "numeric" "numeric"
## U5MR.1985 U5MR.1986 U5MR.1987 U5MR.1988 U5MR.1989 U5MR.1990
## "numeric" "numeric" "numeric" "numeric" "numeric" "numeric"
## U5MR.1991 U5MR.1992 U5MR.1993 U5MR.1994 U5MR.1995 U5MR.1996
## "numeric" "numeric" "numeric" "numeric" "numeric" "numeric"
## U5MR.1997 U5MR.1998 U5MR.1999 U5MR.2000 U5MR.2001 U5MR.2002
## "numeric" "numeric" "numeric" "numeric" "numeric" "numeric"
## U5MR.2003 U5MR.2004 U5MR.2005 U5MR.2006 U5MR.2007 U5MR.2008
## "numeric" "numeric" "numeric" "numeric" "numeric" "numeric"
## U5MR.2009 U5MR.2010 U5MR.2011 U5MR.2012 U5MR.2013 U5MR.2014
## "numeric" "numeric" "numeric" "numeric" "numeric" "numeric"
## U5MR.2015
## "numeric"
dataset1_tidy <- gather(dataset1, Year,Value,-CountryName)
head(dataset1_tidy)
## CountryName Year Value
## 1 Afghanistan U5MR.1950 NA
## 2 Albania U5MR.1950 NA
## 3 Algeria U5MR.1950 NA
## 4 Andorra U5MR.1950 NA
## 5 Angola U5MR.1950 NA
## 6 Antigua & Barbuda U5MR.1950 NA
dataset1_tidy$Year <- str_extract(dataset1_tidy$Year,"\\d+$")
dataset1_tidy$Year <- as.numeric(dataset1_tidy$Year)
dataset1_tidy %>%
group_by(Year)%>%
summarise(avg = mean(Value, na.rm = TRUE)) %>%
ggplot(aes(Year,avg))+geom_line()+theme_classic()+ggtitle("Under-Five Child Mortality Over The Years")
dataset1_tidy %>%
filter(Year == min(Year))%>%
arrange(Value)%>%
head()
## CountryName Year Value
## 1 Sweden 1950 27.1
## 2 Australia 1950 31.6
## 3 Netherlands 1950 31.9
## 4 Norway 1950 32.8
## 5 Denmark 1950 34.1
## 6 New Zealand 1950 35.5
dataset1_tidy %>%
filter(Year == min(Year))%>%
arrange(-Value)%>%
head()
## CountryName Year Value
## 1 Burkina Faso 1950 389.7
## 2 Iraq 1950 364.3
## 3 Benin 1950 348.2
## 4 Senegal 1950 346.2
## 5 Togo 1950 324.4
## 6 Mauritania 1950 316.4