Questions for Analysis:

1). Calculate the average goods export for each month over the three-year span.

2). Calculate the average goods import for each month over the three-year span.

3). Calculate the average services export for each month over the three-year span.

4). Calculate the average services import for each month over the three-year span.

## 
## Attaching package: 'dplyr'
## 
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## 
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union

Import Data

Thanks to this artice from stackoverflow, which gived me the idea of replacing missing values with NA.

http://stackoverflow.com/questions/13822801/whats-the-best-way-to-replace-missing-values-with-na-when-reading-in-a-csv

data_set <- read.csv("https://raw.githubusercontent.com/jasonjgy2000/IS607/master/Projects/Project%202/International%20Trade%20in%20Goods%20and%20Services.csv",skip = 2,na.strings = c("","NA"))
head(data_set)
##          X      X.1     X.2     X.3     X.4     X.5     X.6     X.7
## 1  January   Goods  131,228 191,745 133,738 193,706 129,292 192,242
## 2     <NA> Services   56883  37,995  59,141  38,635  59,733  40,371
## 3     <NA>     <NA>    <NA>    <NA>    <NA>    <NA>    <NA>    <NA>
## 4 February   Goods   132084 193,731 131,768 193,060 126,329 184,370
## 5     <NA> Services  56,828  37,935  57,726  39,270  59,560  40,057
## 6     <NA>     <NA>    <NA>    <NA>    <NA>    <NA>    <NA>    <NA>

Data Cleansing

#assigning user-friendly column names
colnames(data_set) <- c("Period","GoodsType","2013-Export","2013-Import","2014-Export","2014-Import","2015-Export","2015-Import")
# removing empty rows
data_set <- filter(data_set,!is.na(data_set$GoodsType))
# fill missing data - Thanks Joy! 
name_rows<-seq(from = 1, to = nrow(data_set), by=2)
nameless_rows<-seq(from = 2, to = nrow(data_set), by=2)
data_set[nameless_rows,1]<-data_set[name_rows,1]
head(tbl_df(data_set))
## Source: local data frame [6 x 8]
## 
##     Period GoodsType 2013-Export 2013-Import 2014-Export 2014-Import
##     (fctr)    (fctr)      (fctr)      (fctr)      (fctr)      (fctr)
## 1  January    Goods      131,228     191,745     133,738     193,706
## 2  January  Services       56883      37,995      59,141      38,635
## 3 February    Goods       132084     193,731     131,768     193,060
## 4 February  Services      56,828      37,935      57,726      39,270
## 5    March    Goods      130,093     186,326     135,923     198,973
## 6    March  Services      56,647      37,938      58,837      38,908
## Variables not shown: 2015-Export (fctr), 2015-Import (fctr)

Data Transformation

data_set<- data_set %>%  gather("Year","Amount",3:8) %>%  separate("Year",c("Year","Transaction"),sep="-")
## Warning: attributes are not identical across measure variables; they will
## be dropped
# Remove "," from Amount figures. Thanks Youqing!
data_set$Amount <- as.numeric(gsub(',','',data_set$Amount))
head(tbl_df(data_set))
## Source: local data frame [6 x 5]
## 
##     Period GoodsType  Year Transaction Amount
##     (fctr)    (fctr) (chr)       (chr)  (dbl)
## 1  January    Goods   2013      Export 131228
## 2  January  Services  2013      Export  56883
## 3 February    Goods   2013      Export 132084
## 4 February  Services  2013      Export  56828
## 5    March    Goods   2013      Export 130093
## 6    March  Services  2013      Export  56647

Analysis

Question 1 : Calculate the average goods export for each month over the three-year span.

mMean <- data_set %>% filter(!GoodsType == "Services" & Transaction =="Export") %>% group_by(Period) %>% summarise(pmean = mean(Amount))
mMean
## Source: local data frame [4 x 2]
## 
##     Period    pmean
##     (fctr)    (dbl)
## 1    April 132133.3
## 2 February 130060.3
## 3  January 131419.3
## 4    March 131066.3

Question 2 : Calculate the average goods import for each month over the three-year span.

mMean <- data_set %>% filter(!GoodsType == "Services" & Transaction =="Import") %>% group_by(Period) %>% summarise(pmean = mean(Amount))
mMean
## Source: local data frame [4 x 2]
## 
##     Period    pmean
##     (fctr)    (dbl)
## 1    April 193450.3
## 2 February 190387.0
## 3  January 192564.3
## 4    March 194548.7

Question 3 : Calculate the average services export for each month over the three-year span.

mMean <- data_set %>% filter(GoodsType == "Services" & Transaction =="Export") %>% group_by(Period) %>% summarise(pmean = mean(Amount))
mMean
## Source: local data frame [4 x 2]
## 
##     Period    pmean
##     (fctr)    (dbl)
## 1    April 58608.67
## 2 February 58038.00
## 3  January 58585.67
## 4    March 58355.67

Question 4 : Calculate the average services import for each month over the three-year span.

mMean <- data_set %>% filter(GoodsType == "Services" & Transaction =="Import") %>% group_by(Period) %>% summarise(pmean = mean(Amount))
mMean
## Source: local data frame [4 x 2]
## 
##     Period    pmean
##     (fctr)    (dbl)
## 1    April 39389.00
## 2 February 39087.33
## 3  January 39000.33
## 4    March 39142.33