library(tidyr)
library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library(ggplot2)
library(stringr)

Data 1 UNICEF under-five child mortality https://sejdemyr.github.io/r-tutorials/basics/data/unicef-u5mr.csv

 Create a .CSV file (or optionally, a MySQL database!) that includes all of the information included in the dataset. You’re encouraged to use a “wide” structure similar to how the information appears in the discussion item, so that you can practice tidying and #transformations as described below.  Read the information from your .CSV file into R, and use tidyr and dplyr as needed to tidy and transform your data. [Most of your grade will be based on this step!]

dt <- read.csv("unicef-u5mr.csv")

Few first few rows

head(dt)
##         CountryName U5MR.1950 U5MR.1951 U5MR.1952 U5MR.1953 U5MR.1954
## 1       Afghanistan        NA        NA        NA        NA        NA
## 2           Albania        NA        NA        NA        NA        NA
## 3           Algeria        NA        NA        NA        NA       251
## 4           Andorra        NA        NA        NA        NA        NA
## 5            Angola        NA        NA        NA        NA        NA
## 6 Antigua & Barbuda        NA        NA        NA        NA        NA
##   U5MR.1955 U5MR.1956 U5MR.1957 U5MR.1958 U5MR.1959 U5MR.1960 U5MR.1961
## 1        NA        NA        NA        NA        NA        NA     356.5
## 2        NA        NA        NA        NA        NA        NA        NA
## 3     249.9       249       248     247.5     246.7     246.3     246.1
## 4        NA        NA        NA        NA        NA        NA        NA
## 5        NA        NA        NA        NA        NA        NA        NA
## 6        NA        NA        NA        NA        NA        NA        NA
##   U5MR.1962 U5MR.1963 U5MR.1964 U5MR.1965 U5MR.1966 U5MR.1967 U5MR.1968
## 1     350.6     345.0     339.7     334.1     328.7     323.3     318.1
## 2        NA        NA        NA        NA        NA        NA        NA
## 3     246.2     246.8     247.4     248.2     248.7     248.4     247.4
## 4        NA        NA        NA        NA        NA        NA        NA
## 5        NA        NA        NA        NA        NA        NA        NA
## 6        NA        NA        NA        NA        NA        NA        NA
##   U5MR.1969 U5MR.1970 U5MR.1971 U5MR.1972 U5MR.1973 U5MR.1974 U5MR.1975
## 1     313.0     307.8     302.1     296.4     290.8     284.9     279.4
## 2        NA        NA        NA        NA        NA        NA        NA
## 3     245.3     241.7     236.5     230.0     222.5     214.2     205.0
## 4        NA        NA        NA        NA        NA        NA        NA
## 5        NA        NA        NA        NA        NA        NA        NA
## 6        NA        NA        NA        NA        NA        NA        NA
##   U5MR.1976 U5MR.1977 U5MR.1978 U5MR.1979 U5MR.1980 U5MR.1981 U5MR.1982
## 1     273.6     267.8     261.6     255.5     249.1     242.7     236.2
## 2        NA        NA      91.1      84.7      78.6      73.0      67.8
## 3     195.2     184.9     173.8     161.8     148.1     132.5     115.8
## 4        NA        NA        NA        NA        NA        NA        NA
## 5        NA        NA        NA        NA     234.1     232.8     231.5
## 6        NA        NA        NA        NA        NA        NA        NA
##   U5MR.1983 U5MR.1984 U5MR.1985 U5MR.1986 U5MR.1987 U5MR.1988 U5MR.1989
## 1     229.7     222.9     216.0     209.2     202.1     195.0     187.8
## 2      62.8      58.3      54.3      50.7      47.6      44.9      42.5
## 3      99.2      83.8      71.2      61.9      55.4      51.2      48.5
## 4        NA        NA        NA        NA        NA        NA        NA
## 5     230.2     229.1     228.3     227.5     226.9     226.5     226.2
## 6        NA        NA        NA        NA        NA        NA        NA
##   U5MR.1990 U5MR.1991 U5MR.1992 U5MR.1993 U5MR.1994 U5MR.1995 U5MR.1996
## 1     181.0     174.2     167.8     162.0     156.8     152.3     148.6
## 2      40.6      38.8      37.3      36.0      34.6      33.2      31.8
## 3      46.8      45.7      44.9      44.1      43.3      42.5      41.8
## 4       8.5       7.9       7.4       6.9       6.4       6.0       5.7
## 5     226.0     225.9     226.0     225.8     225.5     224.8     224.0
## 6      25.5      24.2      23.1      21.9      20.8      19.7      18.8
##   U5MR.1997 U5MR.1998 U5MR.1999 U5MR.2000 U5MR.2001 U5MR.2002 U5MR.2003
## 1     145.5     142.6     139.9     137.0     133.8     130.3     126.8
## 2      30.3      28.9      27.5      26.2      24.9      23.6      22.5
## 3      41.1      40.6      40.2      39.7      38.9      37.8      36.5
## 4       5.3       5.0       4.8       4.6       4.4       4.2       4.1
## 5     222.6     220.8     218.9     216.7     214.1     211.7     209.2
## 6      17.9      17.0      16.2      15.5      14.8      14.1      13.5
##   U5MR.2004 U5MR.2005 U5MR.2006 U5MR.2007 U5MR.2008 U5MR.2009 U5MR.2010
## 1     123.2     119.6     116.3     113.2     110.4     107.6     105.0
## 2      21.5      20.5      19.5      18.7      17.9      17.3      16.6
## 3      35.1      33.6      32.1      30.7      29.4      28.3      27.3
## 4       4.0       3.9       3.7       3.6       3.5       3.4       3.3
## 5     206.7     203.9     200.5     196.4     192.0     187.3     182.5
## 6      12.9      12.4      11.8      11.3      10.9      10.4       9.9
##   U5MR.2011 U5MR.2012 U5MR.2013 U5MR.2014 U5MR.2015
## 1     102.3      99.5      96.7      93.9      91.1
## 2      16.0      15.5      14.9      14.4      14.0
## 3      26.6      26.1      25.8      25.6      25.5
## 4       3.2       3.1       3.0       2.9       2.8
## 5     177.3     172.2     167.1     162.2     156.9
## 6       9.5       9.1       8.7       8.4       8.1

View column names

names(dt)
##  [1] "CountryName" "U5MR.1950"   "U5MR.1951"   "U5MR.1952"   "U5MR.1953"  
##  [6] "U5MR.1954"   "U5MR.1955"   "U5MR.1956"   "U5MR.1957"   "U5MR.1958"  
## [11] "U5MR.1959"   "U5MR.1960"   "U5MR.1961"   "U5MR.1962"   "U5MR.1963"  
## [16] "U5MR.1964"   "U5MR.1965"   "U5MR.1966"   "U5MR.1967"   "U5MR.1968"  
## [21] "U5MR.1969"   "U5MR.1970"   "U5MR.1971"   "U5MR.1972"   "U5MR.1973"  
## [26] "U5MR.1974"   "U5MR.1975"   "U5MR.1976"   "U5MR.1977"   "U5MR.1978"  
## [31] "U5MR.1979"   "U5MR.1980"   "U5MR.1981"   "U5MR.1982"   "U5MR.1983"  
## [36] "U5MR.1984"   "U5MR.1985"   "U5MR.1986"   "U5MR.1987"   "U5MR.1988"  
## [41] "U5MR.1989"   "U5MR.1990"   "U5MR.1991"   "U5MR.1992"   "U5MR.1993"  
## [46] "U5MR.1994"   "U5MR.1995"   "U5MR.1996"   "U5MR.1997"   "U5MR.1998"  
## [51] "U5MR.1999"   "U5MR.2000"   "U5MR.2001"   "U5MR.2002"   "U5MR.2003"  
## [56] "U5MR.2004"   "U5MR.2005"   "U5MR.2006"   "U5MR.2007"   "U5MR.2008"  
## [61] "U5MR.2009"   "U5MR.2010"   "U5MR.2011"   "U5MR.2012"   "U5MR.2013"  
## [66] "U5MR.2014"   "U5MR.2015"

View data types

sapply(dt,class)
## CountryName   U5MR.1950   U5MR.1951   U5MR.1952   U5MR.1953   U5MR.1954 
##    "factor"   "numeric"   "numeric"   "numeric"   "numeric"   "numeric" 
##   U5MR.1955   U5MR.1956   U5MR.1957   U5MR.1958   U5MR.1959   U5MR.1960 
##   "numeric"   "numeric"   "numeric"   "numeric"   "numeric"   "numeric" 
##   U5MR.1961   U5MR.1962   U5MR.1963   U5MR.1964   U5MR.1965   U5MR.1966 
##   "numeric"   "numeric"   "numeric"   "numeric"   "numeric"   "numeric" 
##   U5MR.1967   U5MR.1968   U5MR.1969   U5MR.1970   U5MR.1971   U5MR.1972 
##   "numeric"   "numeric"   "numeric"   "numeric"   "numeric"   "numeric" 
##   U5MR.1973   U5MR.1974   U5MR.1975   U5MR.1976   U5MR.1977   U5MR.1978 
##   "numeric"   "numeric"   "numeric"   "numeric"   "numeric"   "numeric" 
##   U5MR.1979   U5MR.1980   U5MR.1981   U5MR.1982   U5MR.1983   U5MR.1984 
##   "numeric"   "numeric"   "numeric"   "numeric"   "numeric"   "numeric" 
##   U5MR.1985   U5MR.1986   U5MR.1987   U5MR.1988   U5MR.1989   U5MR.1990 
##   "numeric"   "numeric"   "numeric"   "numeric"   "numeric"   "numeric" 
##   U5MR.1991   U5MR.1992   U5MR.1993   U5MR.1994   U5MR.1995   U5MR.1996 
##   "numeric"   "numeric"   "numeric"   "numeric"   "numeric"   "numeric" 
##   U5MR.1997   U5MR.1998   U5MR.1999   U5MR.2000   U5MR.2001   U5MR.2002 
##   "numeric"   "numeric"   "numeric"   "numeric"   "numeric"   "numeric" 
##   U5MR.2003   U5MR.2004   U5MR.2005   U5MR.2006   U5MR.2007   U5MR.2008 
##   "numeric"   "numeric"   "numeric"   "numeric"   "numeric"   "numeric" 
##   U5MR.2009   U5MR.2010   U5MR.2011   U5MR.2012   U5MR.2013   U5MR.2014 
##   "numeric"   "numeric"   "numeric"   "numeric"   "numeric"   "numeric" 
##   U5MR.2015 
##   "numeric"

Transform wide to long format

dt_tidy <- gather(dt, Year,Value,-CountryName)
head(dt_tidy)
##         CountryName      Year Value
## 1       Afghanistan U5MR.1950    NA
## 2           Albania U5MR.1950    NA
## 3           Algeria U5MR.1950    NA
## 4           Andorra U5MR.1950    NA
## 5            Angola U5MR.1950    NA
## 6 Antigua & Barbuda U5MR.1950    NA

Extract Year and convert to numeric

dt_tidy$Year <- str_extract(dt_tidy$Year,"\\d+$")
dt_tidy$Year <- as.numeric(dt_tidy$Year)

 Perform the analysis requested in the discussion item.

Average Child Mortality over the years

dt_tidy %>%
    group_by(Year)%>%
    summarise(avg = mean(Value, na.rm = TRUE)) %>%
    ggplot(aes(Year,avg))+geom_line()+theme_classic()+ggtitle("AVerage Child Mortality Over The Years")

From the graph we can see that average child mortality has decreased over the years.

Top 5 country with highest child mortality

dt_tidy %>%
    filter(Year == max(Year))%>%
    arrange(-Value)%>%
    head()
##                CountryName Year Value
## 1                   Angola 2015 156.9
## 2                     Chad 2015 138.7
## 3                  Somalia 2015 136.8
## 4 Central African Republic 2015 130.1
## 5             Sierra Leone 2015 120.4
## 6                     Mali 2015 114.7

Angola,Chad,Somalia,Central African Republic ,Sierra Leone, Mali are the top country with highest child mortality in year 2015. Top 5 country with lowest child mortality

dt_tidy %>%
    filter(Year == max(Year))%>%
    arrange(Value)%>%
    head()
##   CountryName Year Value
## 1  Luxembourg 2015   1.9
## 2     Iceland 2015   2.0
## 3     Finland 2015   2.3
## 4      Norway 2015   2.6
## 5    Slovenia 2015   2.6
## 6      Cyprus 2015   2.7

Luxembourg , Iceland,Finland , Norway , Slovenia, Cyprus are the top country with lowest child mortality in year 2015.

Conclusion: From the analysis we can say that child mortality has decrease gradually over the years. African countries has the highest child mortality and developed countries lowest child mortality. Your code should be in an R Markdown file, posted to rpubs.com, and should include narrative descriptions of your data cleanup work, analysis, and conclusions.