Loading Data

messy_edu_aid = read.csv('education_aid.csv', stringsAsFactors = FALSE, sep=',')

Reshaping and Wrangle Data by Gathering Columns into Rows

# install.packages("tidyr")
library(tidyr)
library(dplyr)
## 
## Attaching package: 'dplyr'
## 
## The following object is masked from 'package:stats':
## 
##     filter
## 
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
tidy_df = messy_edu_aid %>% 
  gather('Year', 'Aid', 2:44)

# Convert Year from boolean to Integer
tidy_df$Year_char = as.character(tidy_df$Year)
tidy_df$Year_char = sub('X', '', tidy_df$Year_char)
tidy_df$Year_int = as.integer(tidy_df$Year_char)

# Update Country Column Name
colnames(tidy_df)[1] = c('Country')

What is the overall trend in education aid allocation as % of total aid?

#install.packages('ggplot2')
library(ggplot2)


tidy_df_sub = subset(tidy_df, Country!= '')

ggplot(data = tidy_df_sub,aes(x = Year_int, y = Aid, group = Country, color = Country))+
  geom_line()+
  scale_x_continuous(breaks = seq(1970, 2010, 10))+
  xlab('Year')+
  ylab('Education Aid Allocation as % of Total Aid')+
  ggtitle('Trend in Education Aid Allocation as % of Total Aid')
## Warning in loop_apply(n, do.ply): Removed 237 rows containing missing
## values (geom_path).

What is the overall trend in recent years after 1997?

tidy_df_after_1997 = subset(tidy_df, (Year_int > 1997 & Year_int < 2008)& Country!= '')

ggplot(data = tidy_df_after_1997,aes(x = as.factor(Year_int), y = Aid, group = Country, color = Country))+
  geom_line()+
  xlab('Year')+
  ylab('Education Aid Allocation as % of Total Aid')+
  ggtitle('Recent Trend in Education Aid Allocation as % of Total Aid')

# Numerical Comparison of Aid Allocation by Country after 1997
by(tidy_df_after_1997$Aid, tidy_df_after_1997$Country, summary)
## tidy_df_after_1997$Country: Australia
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##   8.053  10.500  12.290  15.720  21.000  28.320 
## -------------------------------------------------------- 
## tidy_df_after_1997$Country: Austria
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##   29.39   36.93   39.50   38.99   40.76   48.95 
## -------------------------------------------------------- 
## tidy_df_after_1997$Country: Belgium
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##   18.91   19.86   20.52   21.26   21.43   25.64 
## -------------------------------------------------------- 
## tidy_df_after_1997$Country: Canada
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##   11.03   15.83   18.85   18.75   20.04   25.99 
## -------------------------------------------------------- 
## tidy_df_after_1997$Country: Denmark
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##   1.579   5.022   6.594   6.994   9.502  13.170 
## -------------------------------------------------------- 
## tidy_df_after_1997$Country: Finland
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##   7.610   9.736  12.700  12.840  14.250  24.260 
## -------------------------------------------------------- 
## tidy_df_after_1997$Country: France
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##   36.06   37.43   40.36   40.37   42.02   47.32 
## -------------------------------------------------------- 
## tidy_df_after_1997$Country: Germany
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##   8.857  22.570  23.950  22.460  24.740  26.420 
## -------------------------------------------------------- 
## tidy_df_after_1997$Country: Greece
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##   7.832   9.093  16.960  18.480  24.530  39.940 
## -------------------------------------------------------- 
## tidy_df_after_1997$Country: Ireland
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##   15.10   18.75   21.93   22.88   28.20   29.85 
## -------------------------------------------------------- 
## tidy_df_after_1997$Country: Italy
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##   5.727   7.430  10.620  12.540  17.310  24.340 
## -------------------------------------------------------- 
## tidy_df_after_1997$Country: Japan
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##   7.586   8.278  10.260  10.690  12.780  15.190 
## -------------------------------------------------------- 
## tidy_df_after_1997$Country: Luxembourg
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max.    NA's 
##   15.63   21.21   23.76   24.29   29.14   29.93       3 
## -------------------------------------------------------- 
## tidy_df_after_1997$Country: Netherlands
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##   16.92   17.97   18.50   20.00   21.14   29.15 
## -------------------------------------------------------- 
## tidy_df_after_1997$Country: New Zealand
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##   32.43   35.74   42.73   43.21   48.47   58.17 
## -------------------------------------------------------- 
## tidy_df_after_1997$Country: Norway
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##   9.296  10.770  13.720  12.840  14.400  15.420 
## -------------------------------------------------------- 
## tidy_df_after_1997$Country: Portugal
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##   14.65   19.61   25.42   27.51   35.62   39.96 
## -------------------------------------------------------- 
## tidy_df_after_1997$Country: Spain
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##   11.43   15.50   16.47   18.14   18.90   33.29 
## -------------------------------------------------------- 
## tidy_df_after_1997$Country: Sweden
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##   5.403   8.574   9.446   9.066  10.550  11.000 
## -------------------------------------------------------- 
## tidy_df_after_1997$Country: Switzerland
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##   6.103   6.644   7.109   7.110   7.360   8.412 
## -------------------------------------------------------- 
## tidy_df_after_1997$Country: United Kingdom
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##   6.715  12.010  14.170  13.680  16.020  18.010 
## -------------------------------------------------------- 
## tidy_df_after_1997$Country: United States
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##   2.339   3.080   3.679   3.838   4.323   5.747

What is the difference in edcuation aid allocation between developed countries in North America and Asia?

tidy_df_US_JPN = subset(tidy_df, (Country == 'United States' | Country == 'Japan'))

summary(tidy_df_US_JPN$Year_int)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max.    NA's 
##    1967    1977    1988    1988    1998    2008       2
ggplot(data = tidy_df_US_JPN,aes(x = as.factor(Year_int), y = Aid, group = Country, color = Country))+
  geom_line()+
  scale_x_discrete(breaks = seq(1970, 2008, 5))+
  xlab('Year')+
  ylab('Education Aid Allocation as % of Total Aid')+
  ggtitle('Aid Allocation Comparison between US and Japan')
## Warning in loop_apply(n, do.ply): Removed 12 rows containing missing values
## (geom_path).

# Numerical Comparison of Aid Allocation by US and Japan
by(tidy_df_US_JPN$Aid, tidy_df_US_JPN$Country, summary)
## tidy_df_US_JPN$Country: Japan
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max.    NA's 
##   1.084   3.728   7.988   7.690  10.240  15.190       6 
## -------------------------------------------------------- 
## tidy_df_US_JPN$Country: United States
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max.    NA's 
##   2.339   5.154   7.992  11.160  11.220  69.340       8
# Next step: Calculate education aid in dollar by including total aid received in each country between 1967 - 2008