Importing and cleaning the data
require(dplyr)
## Loading required package: dplyr
##
## Attaching package: 'dplyr'
##
## The following objects are masked from 'package:stats':
##
## filter, lag
##
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
require(tidyr)
## Loading required package: tidyr
ustradedat <- read.csv("C:\\Users\\Andrew\\Desktop\\Cuny\\Data Acquisition\\Project 2\\Ex3USTrade\\ustrade.csv", stringsAsFactors = FALSE)
colnames(ustradedat) <- c("month", "type", "2013exports", "2013imports", "2014exports", "2014imports", "2015exports", "2015imports")
ustrade <- ustradedat[ustradedat$type != "",]
ustrade$month[1:8] <- c("January", "January", "February", "February", "March", "March", "April", "April")
ustrade[,3:8] <- apply(ustrade[,3:8], 2, function(ustrade) as.numeric(gsub(",","", ustrade)))
str(ustrade)
## 'data.frame': 8 obs. of 8 variables:
## $ month : chr "January" "January" "February" "February" ...
## $ type : chr "Goods" "Services" "Goods" "Services" ...
## $ 2013exports: num 131228 56883 132084 56828 130093 ...
## $ 2013imports: num 191745 37995 193731 37935 186326 ...
## $ 2014exports: num 133738 59141 131768 57726 135923 ...
## $ 2014imports: num 193706 38635 193060 39270 198973 ...
## $ 2015exports: num 129292 59733 126329 59560 127183 ...
## $ 2015imports: num 192242 40371 184370 40057 198347 ...
Tidying the data
Tustrade <- ustrade %>%
gather(yeartype, amount, 3:8) %>%
separate(yeartype, c("year", "trade"), -8) %>%
mutate(year = as.numeric(year)) %>%
spread(trade, amount)
str(Tustrade)
## 'data.frame': 24 obs. of 5 variables:
## $ month : chr "April" "April" "April" "April" ...
## $ type : chr "Goods" "Goods" "Goods" "Services" ...
## $ year : num 2013 2014 2015 2013 2014 ...
## $ exports: num 131468 135556 129376 56592 59468 ...
## $ imports: num 189507 199877 190967 38318 39417 ...
2 of the questions posed by poster. Averages by month are pretty consistent.
#Calculate average goods export for each month over 3 year span
Tustrade %>%
filter(type == "Goods") %>%
group_by(month) %>%
summarise(avggoodsexports = mean(exports))
## Source: local data frame [4 x 2]
##
## month avggoodsexports
## (chr) (dbl)
## 1 April 132133.3
## 2 February 130060.3
## 3 January 131419.3
## 4 March 131066.3
#Calculate average services import for each month over 3 year span
Tustrade %>%
filter(type == "Services") %>%
group_by(month) %>%
summarise(avgservicesimports = mean(imports))
## Source: local data frame [4 x 2]
##
## month avgservicesimports
## (chr) (dbl)
## 1 April 39389.00
## 2 February 39087.33
## 3 January 39000.33
## 4 March 39142.33
Our advantages in services exported is far outweighed by our negative balance of goods exported.
#Check out total permuations
Tustrade %>%
group_by(type) %>%
summarise(avgexports = mean(exports), avgimports = mean(imports))
## Source: local data frame [2 x 3]
##
## type avgexports avgimports
## (chr) (dbl) (dbl)
## 1 Goods 131169.8 192737.58
## 2 Services 58397.0 39154.75
And, since 2013 the difference has continued to widen.
#calculate movement over years
Tustrade %>%
group_by(year, type) %>%
summarise(avgexports = mean(exports), avgimports = mean(imports)) %>%
mutate(avgtradebalance = avgexports - avgimports) %>%
group_by %>%
gather(trade, amount, 3:5) %>%
filter(trade == "avgtradebalance") %>%
spread(year, amount) %>%
mutate(unitchange13to15 = `2015` - `2013`)
## Source: local data frame [2 x 6]
##
## type trade 2013 2014 2015 unitchange13to15
## (chr) (fctr) (dbl) (dbl) (dbl) (dbl)
## 1 Goods avgtradebalance -59109 -62157.75 -63436.50 -4327.50
## 2 Services avgtradebalance 18691 19735.50 19300.25 609.25