| dplyr | tidyr |
|---|---|
| select | gather |
| filter | separate |
| arrange | |
library(tidyr)
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(stringr)
library(sqldf)
## Loading required package: gsubfn
## Loading required package: proto
## Loading required package: RSQLite
## Loading required package: DBI
library(ggplot2)
ChildMortality_df = read.csv(file="01_ChildMortality.csv", header=TRUE, sep=",", skip = 6)
nrow(ChildMortality_df)
## [1] 585
colnames(ChildMortality_df) = gsub("\\.","_",colnames(ChildMortality_df))
ChildMortality_df = filter(ChildMortality_df, Uncertainty_bounds_ == "Median" &
(CountryName == "Canada" |
CountryName == "France" |
CountryName == "Germany" |
CountryName == "United Kingdom"))
nrow(ChildMortality_df)
## [1] 4
ChildDeaths1_df = select(ChildMortality_df, matches("CountryName|Under_five_Deaths|Infant_Deaths|Neonatal_Deaths"))
nrow(ChildDeaths1_df)
## [1] 4
ChildDeaths2_df = gather(ChildDeaths1_df, Type, Value, Under_five_Deaths_1950:Neonatal_Deaths_2015)
head(ChildDeaths2_df)
## CountryName Type Value
## 1 Canada Under_five_Deaths_1950 17848
## 2 Germany Under_five_Deaths_1950 NA
## 3 France Under_five_Deaths_1950 58258
## 4 United Kingdom Under_five_Deaths_1950 35795
## 5 Canada Under_five_Deaths_1951 16562
## 6 Germany Under_five_Deaths_1951 NA
ChildDeaths2_df$Type = gsub("Under_five_Deaths", "U5D", ChildDeaths2_df$Type)
ChildDeaths2_df$Type = gsub("Infant_Deaths", "IND", ChildDeaths2_df$Type)
ChildDeaths2_df$Type = gsub("Neonatal_Deaths", "NND", ChildDeaths2_df$Type)
head(ChildDeaths2_df, 20)
## CountryName Type Value
## 1 Canada U5D_1950 17848
## 2 Germany U5D_1950 NA
## 3 France U5D_1950 58258
## 4 United Kingdom U5D_1950 35795
## 5 Canada U5D_1951 16562
## 6 Germany U5D_1951 NA
## 7 France U5D_1951 46488
## 8 United Kingdom U5D_1951 28588
## 9 Canada U5D_1952 15817
## 10 Germany U5D_1952 NA
## 11 France U5D_1952 37923
## 12 United Kingdom U5D_1952 23677
## 13 Canada U5D_1953 15445
## 14 Germany U5D_1953 NA
## 15 France U5D_1953 32248
## 16 United Kingdom U5D_1953 20703
## 17 Canada U5D_1954 15304
## 18 Germany U5D_1954 NA
## 19 France U5D_1954 28699
## 20 United Kingdom U5D_1954 19374
ChildDeaths2_df = separate(ChildDeaths2_df, Type, c("Rate", "Year"), sep = "_")
head(ChildDeaths2_df, 20)
## CountryName Rate Year Value
## 1 Canada U5D 1950 17848
## 2 Germany U5D 1950 NA
## 3 France U5D 1950 58258
## 4 United Kingdom U5D 1950 35795
## 5 Canada U5D 1951 16562
## 6 Germany U5D 1951 NA
## 7 France U5D 1951 46488
## 8 United Kingdom U5D 1951 28588
## 9 Canada U5D 1952 15817
## 10 Germany U5D 1952 NA
## 11 France U5D 1952 37923
## 12 United Kingdom U5D 1952 23677
## 13 Canada U5D 1953 15445
## 14 Germany U5D 1953 NA
## 15 France U5D 1953 32248
## 16 United Kingdom U5D 1953 20703
## 17 Canada U5D 1954 15304
## 18 Germany U5D 1954 NA
## 19 France U5D 1954 28699
## 20 United Kingdom U5D 1954 19374
ChildDeaths2_df = arrange(ChildDeaths2_df, CountryName)
ChildDeaths2_dfU5D = filter(ChildDeaths2_df, Rate == "U5D" & Value != "NA" & Year>=2005)
print(ChildDeaths2_dfU5D)
## CountryName Rate Year Value
## 1 Canada U5D 2005 2122
## 2 Canada U5D 2006 2149
## 3 Canada U5D 2007 2181
## 4 Canada U5D 2008 2206
## 5 Canada U5D 2009 2217
## 6 Canada U5D 2010 2201
## 7 Canada U5D 2011 2121
## 8 Canada U5D 2012 2056
## 9 Canada U5D 2013 1978
## 10 Canada U5D 2014 1872
## 11 Canada U5D 2015 1824
## 12 France U5D 2005 3595
## 13 France U5D 2006 3529
## 14 France U5D 2007 3455
## 15 France U5D 2008 3378
## 16 France U5D 2009 3377
## 17 France U5D 2010 3376
## 18 France U5D 2011 3379
## 19 France U5D 2012 3383
## 20 France U5D 2013 3463
## 21 France U5D 2014 3462
## 22 France U5D 2015 3378
## 23 Germany U5D 2005 3161
## 24 Germany U5D 2006 3022
## 25 Germany U5D 2007 2930
## 26 Germany U5D 2008 2872
## 27 Germany U5D 2009 2827
## 28 Germany U5D 2010 2784
## 29 Germany U5D 2011 2734
## 30 Germany U5D 2012 2687
## 31 Germany U5D 2013 2641
## 32 Germany U5D 2014 2594
## 33 Germany U5D 2015 2545
## 34 United Kingdom U5D 2005 4207
## 35 United Kingdom U5D 2006 4317
## 36 United Kingdom U5D 2007 4489
## 37 United Kingdom U5D 2008 4583
## 38 United Kingdom U5D 2009 4618
## 39 United Kingdom U5D 2010 4533
## 40 United Kingdom U5D 2011 4354
## 41 United Kingdom U5D 2012 4079
## 42 United Kingdom U5D 2013 3755
## 43 United Kingdom U5D 2014 3460
## 44 United Kingdom U5D 2015 3235
ggplot(data=ChildDeaths2_dfU5D, aes(x=Year, y=Value, group=CountryName, color=CountryName)) + geom_line() +
geom_point() + ylab("# of Deaths") + ggtitle("Under 5 Child Deaths")
ChildDeaths2_df = arrange(ChildDeaths2_df, CountryName)
ChildDeaths2_dfIND = filter(ChildDeaths2_df, Rate == "IND" & Value != "NA" & Year>=2005)
print(ChildDeaths2_dfIND)
## CountryName Rate Year Value
## 1 Canada IND 2005 1847
## 2 Canada IND 2006 1871
## 3 Canada IND 2007 1898
## 4 Canada IND 2008 1918
## 5 Canada IND 2009 1960
## 6 Canada IND 2010 1939
## 7 Canada IND 2011 1852
## 8 Canada IND 2012 1822
## 9 Canada IND 2013 1743
## 10 Canada IND 2014 1636
## 11 Canada IND 2015 1587
## 12 France IND 2005 2981
## 13 France IND 2006 2908
## 14 France IND 2007 2830
## 15 France IND 2008 2750
## 16 France IND 2009 2747
## 17 France IND 2010 2745
## 18 France IND 2011 2747
## 19 France IND 2012 2749
## 20 France IND 2013 2830
## 21 France IND 2014 2829
## 22 France IND 2015 2747
## 23 Germany IND 2005 2578
## 24 Germany IND 2006 2457
## 25 Germany IND 2007 2377
## 26 Germany IND 2008 2326
## 27 Germany IND 2009 2352
## 28 Germany IND 2010 2313
## 29 Germany IND 2011 2265
## 30 Germany IND 2012 2219
## 31 Germany IND 2013 2240
## 32 Germany IND 2014 2191
## 33 Germany IND 2015 2140
## 34 United Kingdom IND 2005 3584
## 35 United Kingdom IND 2006 3692
## 36 United Kingdom IND 2007 3851
## 37 United Kingdom IND 2008 3925
## 38 United Kingdom IND 2009 4016
## 39 United Kingdom IND 2010 3917
## 40 United Kingdom IND 2011 3704
## 41 United Kingdom IND 2012 3498
## 42 United Kingdom IND 2013 3173
## 43 United Kingdom IND 2014 2880
## 44 United Kingdom IND 2015 2655
ggplot(data=ChildDeaths2_dfIND, aes(x=Year, y=Value, group=CountryName, color=CountryName)) + geom_line() +
geom_point() + ylab("# of Deaths") + ggtitle("Infant Deaths")
ChildDeaths2_df = arrange(ChildDeaths2_df, CountryName)
ChildDeaths2_dfNND = filter(ChildDeaths2_df, Rate == "NND" & Value != "NA" & Year>=2005)
print(ChildDeaths2_dfNND)
## CountryName Rate Year Value
## 1 Canada NND 2005 1393
## 2 Canada NND 2006 1439
## 3 Canada NND 2007 1451
## 4 Canada NND 2008 1457
## 5 Canada NND 2009 1489
## 6 Canada NND 2010 1464
## 7 Canada NND 2011 1418
## 8 Canada NND 2012 1356
## 9 Canada NND 2013 1288
## 10 Canada NND 2014 1227
## 11 Canada NND 2015 1181
## 12 France NND 2005 1882
## 13 France NND 2006 1807
## 14 France NND 2007 1807
## 15 France NND 2008 1806
## 16 France NND 2009 1805
## 17 France NND 2010 1803
## 18 France NND 2011 1726
## 19 France NND 2012 1728
## 20 France NND 2013 1807
## 21 France NND 2014 1807
## 22 France NND 2015 1726
## 23 Germany NND 2005 1718
## 24 Germany NND 2006 1681
## 25 Germany NND 2007 1606
## 26 Germany NND 2008 1550
## 27 Germany NND 2009 1568
## 28 Germany NND 2010 1519
## 29 Germany NND 2011 1532
## 30 Germany NND 2012 1546
## 31 Germany NND 2013 1493
## 32 Germany NND 2014 1506
## 33 Germany NND 2015 1449
## 34 United Kingdom NND 2005 2459
## 35 United Kingdom NND 2006 2510
## 36 United Kingdom NND 2007 2592
## 37 United Kingdom NND 2008 2672
## 38 United Kingdom NND 2009 2706
## 39 United Kingdom NND 2010 2670
## 40 United Kingdom NND 2011 2557
## 41 United Kingdom NND 2012 2388
## 42 United Kingdom NND 2013 2196
## 43 United Kingdom NND 2014 1946
## 44 United Kingdom NND 2015 1820
ggplot(data=ChildDeaths2_dfNND, aes(x=Year, y=Value, group=CountryName, color=CountryName)) + geom_line() +
geom_point() + ylab("# of Deaths") + ggtitle("Neonatal Deaths")
(4.a) From the graphs we can conclude that the number of child deaths (Under 5, infant and neonatal) in United Kingdom increased from 2005 to 2009 and then decreased for every year afterwards. We can see a similar pattern in Canada. It would require further data/analysis to find the root causes of why it increased from 2005 to 2009 and then decreases afterwards.