Loading Data
messy_edu_aid = read.csv('education_aid.csv', stringsAsFactors = FALSE, sep=',')
Reshaping and Wrangle Data by Gathering Columns into Rows
# install.packages("tidyr")
library(tidyr)
library(dplyr)
##
## Attaching package: 'dplyr'
##
## The following object is masked from 'package:stats':
##
## filter
##
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
tidy_df = messy_edu_aid %>%
gather('Year', 'Aid', 2:44)
# Convert Year from boolean to Integer
tidy_df$Year_char = as.character(tidy_df$Year)
tidy_df$Year_char = sub('X', '', tidy_df$Year_char)
tidy_df$Year_int = as.integer(tidy_df$Year_char)
# Update Country Column Name
colnames(tidy_df)[1] = c('Country')
What is the overall trend in education aid allocation as % of total aid?
#install.packages('ggplot2')
library(ggplot2)
tidy_df_sub = subset(tidy_df, Country!= '')
ggplot(data = tidy_df_sub,aes(x = Year_int, y = Aid, group = Country, color = Country))+
geom_line()+
scale_x_continuous(breaks = seq(1970, 2010, 10))+
xlab('Year')+
ylab('Education Aid Allocation as % of Total Aid')+
ggtitle('Trend in Education Aid Allocation as % of Total Aid')
## Warning in loop_apply(n, do.ply): Removed 237 rows containing missing
## values (geom_path).

What is the overall trend in recent years after 1997?
tidy_df_after_1997 = subset(tidy_df, (Year_int > 1997 & Year_int < 2008)& Country!= '')
ggplot(data = tidy_df_after_1997,aes(x = as.factor(Year_int), y = Aid, group = Country, color = Country))+
geom_line()+
xlab('Year')+
ylab('Education Aid Allocation as % of Total Aid')+
ggtitle('Recent Trend in Education Aid Allocation as % of Total Aid')

# Numerical Comparison of Aid Allocation by Country after 1997
by(tidy_df_after_1997$Aid, tidy_df_after_1997$Country, summary)
## tidy_df_after_1997$Country: Australia
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 8.053 10.500 12.290 15.720 21.000 28.320
## --------------------------------------------------------
## tidy_df_after_1997$Country: Austria
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 29.39 36.93 39.50 38.99 40.76 48.95
## --------------------------------------------------------
## tidy_df_after_1997$Country: Belgium
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 18.91 19.86 20.52 21.26 21.43 25.64
## --------------------------------------------------------
## tidy_df_after_1997$Country: Canada
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 11.03 15.83 18.85 18.75 20.04 25.99
## --------------------------------------------------------
## tidy_df_after_1997$Country: Denmark
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 1.579 5.022 6.594 6.994 9.502 13.170
## --------------------------------------------------------
## tidy_df_after_1997$Country: Finland
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 7.610 9.736 12.700 12.840 14.250 24.260
## --------------------------------------------------------
## tidy_df_after_1997$Country: France
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 36.06 37.43 40.36 40.37 42.02 47.32
## --------------------------------------------------------
## tidy_df_after_1997$Country: Germany
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 8.857 22.570 23.950 22.460 24.740 26.420
## --------------------------------------------------------
## tidy_df_after_1997$Country: Greece
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 7.832 9.093 16.960 18.480 24.530 39.940
## --------------------------------------------------------
## tidy_df_after_1997$Country: Ireland
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 15.10 18.75 21.93 22.88 28.20 29.85
## --------------------------------------------------------
## tidy_df_after_1997$Country: Italy
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 5.727 7.430 10.620 12.540 17.310 24.340
## --------------------------------------------------------
## tidy_df_after_1997$Country: Japan
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 7.586 8.278 10.260 10.690 12.780 15.190
## --------------------------------------------------------
## tidy_df_after_1997$Country: Luxembourg
## Min. 1st Qu. Median Mean 3rd Qu. Max. NA's
## 15.63 21.21 23.76 24.29 29.14 29.93 3
## --------------------------------------------------------
## tidy_df_after_1997$Country: Netherlands
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 16.92 17.97 18.50 20.00 21.14 29.15
## --------------------------------------------------------
## tidy_df_after_1997$Country: New Zealand
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 32.43 35.74 42.73 43.21 48.47 58.17
## --------------------------------------------------------
## tidy_df_after_1997$Country: Norway
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 9.296 10.770 13.720 12.840 14.400 15.420
## --------------------------------------------------------
## tidy_df_after_1997$Country: Portugal
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 14.65 19.61 25.42 27.51 35.62 39.96
## --------------------------------------------------------
## tidy_df_after_1997$Country: Spain
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 11.43 15.50 16.47 18.14 18.90 33.29
## --------------------------------------------------------
## tidy_df_after_1997$Country: Sweden
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 5.403 8.574 9.446 9.066 10.550 11.000
## --------------------------------------------------------
## tidy_df_after_1997$Country: Switzerland
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 6.103 6.644 7.109 7.110 7.360 8.412
## --------------------------------------------------------
## tidy_df_after_1997$Country: United Kingdom
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 6.715 12.010 14.170 13.680 16.020 18.010
## --------------------------------------------------------
## tidy_df_after_1997$Country: United States
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 2.339 3.080 3.679 3.838 4.323 5.747
What is the difference in edcuation aid allocation between developed countries in North America and Asia?
tidy_df_US_JPN = subset(tidy_df, (Country == 'United States' | Country == 'Japan'))
summary(tidy_df_US_JPN$Year_int)
## Min. 1st Qu. Median Mean 3rd Qu. Max. NA's
## 1967 1977 1988 1988 1998 2008 2
ggplot(data = tidy_df_US_JPN,aes(x = as.factor(Year_int), y = Aid, group = Country, color = Country))+
geom_line()+
scale_x_discrete(breaks = seq(1970, 2008, 5))+
xlab('Year')+
ylab('Education Aid Allocation as % of Total Aid')+
ggtitle('Aid Allocation Comparison between US and Japan')
## Warning in loop_apply(n, do.ply): Removed 12 rows containing missing values
## (geom_path).

# Numerical Comparison of Aid Allocation by US and Japan
by(tidy_df_US_JPN$Aid, tidy_df_US_JPN$Country, summary)
## tidy_df_US_JPN$Country: Japan
## Min. 1st Qu. Median Mean 3rd Qu. Max. NA's
## 1.084 3.728 7.988 7.690 10.240 15.190 6
## --------------------------------------------------------
## tidy_df_US_JPN$Country: United States
## Min. 1st Qu. Median Mean 3rd Qu. Max. NA's
## 2.339 5.154 7.992 11.160 11.220 69.340 8
# Next step: Calculate education aid in dollar by including total aid received in each country between 1967 - 2008