Education Aid and Unemployment Rate Analysis

Loading Data

# Load original education aid allocation as percentage of total aid
messy_edu_aid_per = read.csv('education_aid.csv', stringsAsFactors = FALSE, sep=',')

# Load original total aid in dollar
messy_total_aid_dollar = read.csv('total_aid.csv', stringsAsFactors = FALSE, sep=',')

# Unemployment Rate
messy_unemploy_rate = read.csv('unemployment.csv', stringsAsFactors = FALSE, sep=',')

Reshaping and Wrangle Data by Gathering Columns into Rows

# install.packages("tidyr")
library(tidyr)
library(dplyr)

## 
## Attaching package: 'dplyr'
## 
## The following object is masked from 'package:stats':
## 
##     filter
## 
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union

#Transform time series data
tidy_edu_aid_per =messy_edu_aid_per %>% 
  gather('Year', 'Aid_Per', 2:44)

tidy_total_aid_dollar = messy_total_aid_dollar %>%
  gather('Year', 'Aid_Amount', 2:44)

tidy_unemploy_rate= messy_unemploy_rate %>%
  gather('Year', 'Unemploy_Rate', 2:27)

# Convert Year from boolean to Integer
tidy_edu_aid_per$Year_char = as.character(tidy_edu_aid_per$Year)
tidy_edu_aid_per$Year_char = sub('X', '', tidy_edu_aid_per$Year_char)
tidy_edu_aid_per$Year_int = as.integer(tidy_edu_aid_per$Year_char)

tidy_total_aid_dollar$Year_char = as.character(tidy_total_aid_dollar$Year)
tidy_total_aid_dollar$Year_char = sub('X', '', tidy_total_aid_dollar$Year_char)
tidy_total_aid_dollar$Year_int = as.integer(tidy_total_aid_dollar$Year_char)

tidy_unemploy_rate$Year_char = as.character(tidy_unemploy_rate$Year)
tidy_unemploy_rate$Year_char = sub('X', '', tidy_unemploy_rate$Year_char)
tidy_unemploy_rate$Year_int = as.integer(tidy_unemploy_rate$Year_char)

# Update Country Column Name
colnames(tidy_edu_aid_per)[1] = c('Country')
colnames(tidy_total_aid_dollar)[1] = c('Country')
colnames(tidy_unemploy_rate)[1] = c('Country')

Create new data frame for education aid in dollar

# Define column vectors for new data frame
Country = c(tidy_edu_aid_per$Country)
Year_int = c(tidy_edu_aid_per$Year_int)

# Education Aid in Dolloar = % of Education Aid allocation x Total Aid in Dollar
Aid_Dollar = c(tidy_edu_aid_per$Aid_Per*tidy_total_aid_dollar$Aid_Amount)

edu_aid_dollar = data.frame(Country, Year_int, Aid_Dollar)

Overall trend in education aid dollar comparing between US and Japan

library(ggplot2)

df = subset(edu_aid_dollar, edu_aid_dollar$Country == 'United States'|edu_aid_dollar$Country=='Japan' | edu_aid_dollar$Country=='Netherlands')

ggplot(data = df, aes(x=Year_int, y=Aid_Dollar/1000000000, group= Country, color = Country))+
  geom_line()+
  scale_x_continuous(breaks = seq(1970, 2010, 5))+
  scale_y_continuous(breaks = seq(0, 300, 30))+
  xlab('Year')+
  ylab('Education Aid in US Dollar (in Billion, valued in 2007)')+
  ggtitle('Comparison of Education Aid in US, Japan, and Netherlands')

## Warning in loop_apply(n, do.ply): Removed 19 rows containing missing values
## (geom_path).

Subset educaiton aid data, join with unemployment data, and export

#subset the education aid data
df_US_edu = subset(edu_aid_dollar, edu_aid_dollar$Country == 'United States' & edu_aid_dollar$Year >= 1981)
df_JPN_edu = subset(edu_aid_dollar, edu_aid_dollar$Country == 'Japan'& edu_aid_dollar$Year >= 1981)

#add unemployment rate data to respective subsets
df_US_unemploy= subset(tidy_unemploy_rate, tidy_unemploy_rate$Country == 'United States')
df_JPN_unemploy= subset(tidy_unemploy_rate, tidy_unemploy_rate$Country == 'Japan')

#join education aid and unemployment data by Year and select the key columns for output
df_US_unemploy$Year = as.integer(df_US_unemploy$Year)
df_US = dplyr::inner_join(df_US_edu, df_US_unemploy, by = "Year_int")
df_US = dplyr::select(df_US, Country.x, Year_int, Aid_Dollar, Unemploy_Rate)
df_US$Aid_Dollar = df_US$Aid_Dollar / 1000000000
colnames(df_US)[3] = c('Aid_Dollar_in_Billion')

df_JPN_unemploy$Year = as.integer(df_JPN_unemploy$Year)
df_JPN = dplyr::inner_join(df_JPN_edu, df_JPN_unemploy, by = "Year_int")
df_JPN = dplyr::select(df_JPN, Country.x, Year_int, Aid_Dollar, Unemploy_Rate)
df_JPN$Aid_Dollar = df_JPN$Aid_Dollar / 1000000000
colnames(df_JPN)[3] = c('Aid_Dollar_in_Billion')

#write these files to csv 
write.csv('US_edu_aid_unemploy.csv', x = df_US)
write.csv('JPN_edu_aid_unemploy.csv', x = df_JPN)

Education Aid and Unemployment Rate Analysis

Ian Xiao

June 2, 2015

Loading Data

Reshaping and Wrangle Data by Gathering Columns into Rows

Create new data frame for education aid in dollar

Overall trend in education aid dollar comparing between US and Japan

Subset educaiton aid data, join with unemployment data, and export