EC313 Lecture and Class, Week 7

J James Reade

24-25/02/2015

Introduction

Forecast Competition: Inflation…

cpi <- read.csv("cpi_090215.csv",stringsAsFactors=F)
cpi.t <- ts(cpi$Actual[order(cpi$DateTime)],start=c(2007,1),freq=12)
cpi.t.a <- auto.arima(cpi.t)
cpi.t.f <- forecast(cpi.t.a,h=10)
plot(cpi.t.f,main="Forecasts of CPI Inflation",ylab="Inflation (%, YoY)",xlab="Date")

cpi.t.f$mean
##            Jan       Feb       Mar       Apr       May       Jun       Jul
## 2015 0.5015949 0.6033607 0.6889686 0.7836768 0.9564424 0.8226495 0.9617996
##            Aug       Sep       Oct
## 2015 1.0601266 1.2608149 1.2895016
cpi23 <- read.csv("cpi_230215.csv",stringsAsFactors=F)
tail(cpi23,1)
##             DateTime Actual Consensus Previous
## 97 20070213 09:30:00    2.7         3       NA
getSymbols("GBRCPIALLMINMEI",src="FRED",return.class="zoo") # CPI since 1955 (from FRED)
##     As of 0.4-0, 'getSymbols' uses env=parent.frame() and
##  auto.assign=TRUE by default.
## 
##  This  behavior  will be  phased out in 0.5-0  when the call  will
##  default to use auto.assign=FALSE. getOption("getSymbols.env") and 
##  getOptions("getSymbols.auto.assign") are now checked for alternate defaults
## 
##  This message is shown once per session and may be disabled by setting 
##  options("getSymbols.warning4.0"=FALSE). See ?getSymbol for more details
## [1] "GBRCPIALLMINMEI"
ukcpi <- as.numeric(coredata(diff(log(GBRCPIALLMINMEI),differences=1,lag=12)))
ukcpi.t <- ts(ukcpi,start=c(1956,1),freq=12)
tsdisplay(ukcpi.t)

tsdisplay(diff(ukcpi.t,differences=1))

inf.ar1 <- Arima(ukcpi.t,order=c(2,1,2))
inf.ar2 <- Arima(ukcpi.t,order=c(3,1,2))
inf.ar3 <- Arima(ukcpi.t,order=c(1,1,2))
inf.ar4 <- Arima(ukcpi.t,order=c(2,1,3))
inf.ar5 <- Arima(ukcpi.t,order=c(2,1,1))
aiccs <- c(inf.ar1$aicc,inf.ar2$aicc,inf.ar3$aicc,inf.ar4$aicc,inf.ar5$aicc)
inf.auto <- auto.arima(ukcpi.t)
inf.forc <- forecast(inf.ar1,h=60)
inf.auto.forc <- forecast(inf.auto,h=60)
plot(ukcpi.t,xlim=range(2008,2020),ylim=range(0,0.05),main="UK CPI Inflation",ylab="%",xlab="Date")
lines(inf.forc$mean,col="blue")
lines(inf.auto.forc$mean,col="red")

R: What is it?

Lots of online help: Resources

Exam Feedback

Today: Further Adventures in R

Loading Data: Quandl

ftse <- Quandl("YAHOO/INDEX_FTSE")
ftse <- ftse[order(ftse$Date),]
ftse.0 <- ftse[ftse$Date<as.Date("2015-02-13"),]
ftse.auto <- auto.arima(ftse$Close)
ftse.forc <- forecast(ftse.auto,h=50)
plot(ftse.forc,include=100)

ftse.forc$mean[seq(1,35,5)]
## [1] 6948.161 6949.152 6952.735 6956.367 6960.000 6963.632 6967.264
ftse[ftse$Date==as.Date("2015-02-13") | ftse$Date==as.Date("2015-02-20"),]
##         Date   Open   High    Low  Close    Volume Adjusted Close
## 8 2015-02-13 6828.1 6887.6 6828.1 6873.5         0         6873.5
## 3 2015-02-20 6888.9 6920.5 6884.8 6915.2 764028800         6915.2

Loading Data: quantmod

getSymbols("YHOO",src="google",return.class="zoo") # from google finance
## [1] "YHOO"
plot(YHOO)

getSymbols("GOOG",src="yahoo",return.class="zoo") # from yahoo finance 
## [1] "GOOG"
plot(GOOG)

getSymbols("DEXJPUS",src="FRED",return.class="zoo") # FX rates from FRED 
## [1] "DEXJPUS"
plot(DEXJPUS)

Loading Data: Via CSV or Excel

gs <- read.csv("ukgs_1700_2016.csv",stringsAsFactors=F)
head(gs)
##   Year GDP.UK...163..billion Population.UK.million
## 1 1700               0.06058                 9.100
## 2 1701                0.0608                 9.129
## 3 1702               0.06102                 9.158
## 4 1703               0.06124                 9.188
## 5 1704               0.06146                 9.217
## 6 1705               0.06168                 9.247
##   Total.Spending.total.percent.GDP X
## 1                             5.28 a
## 2                             5.66 a
## 3                             8.21 a
## 4                             8.68 a
## 5                             8.99 a
## 6                             9.52 a
str(gs)
## 'data.frame':    317 obs. of  5 variables:
##  $ Year                            : int  1700 1701 1702 1703 1704 1705 1706 1707 1708 1709 ...
##  $ GDP.UK...163..billion           : chr  "0.06058" "0.0608" "0.06102" "0.06124" ...
##  $ Population.UK.million           : num  9.1 9.13 9.16 9.19 9.22 ...
##  $ Total.Spending.total.percent.GDP: num  5.28 5.66 8.21 8.68 8.99 ...
##  $ X                               : chr  "a" "a" "a" "a" ...

Plotting Data: Line Plots

plot(gs$Year,gs$Total.Spending.total.percent.GDP,type="l")

Plotting Data: Scatter Plots

getSymbols('JTSJOR',src='FRED',return.class="zoo")
## [1] "JTSJOR"
getSymbols('UNRATE',src='FRED',return.class="zoo")
## [1] "UNRATE"
getSymbols('CPIAUCSL',src='FRED',return.class="zoo")
## [1] "CPIAUCSL"
plot(UNRATE,JTSJOR)

infl <- diff(log(CPIAUCSL),differences=1,lag=12)
plot(UNRATE,infl,main="US Phillips Curve?",ylab="Inflation",xlab="Unemployment Rate")

getSymbols('JPNURHARMMDSMEI',src='FRED',return.class="zoo")
## [1] "JPNURHARMMDSMEI"
getSymbols('JPNCPIALLMINMEI',src='FRED',return.class="zoo")
## [1] "JPNCPIALLMINMEI"
infl.j <- diff(log(JPNCPIALLMINMEI),differences=1,lag=12)
plot(JPNURHARMMDSMEI,infl.j,main="Japan's Phillips Curve",ylab="Inflation (CPI)",xlab="Unemployment",pch=16)

plot(-JPNURHARMMDSMEI,infl.j,main="Japan's Phillips Curve",ylab="Inflation (CPI)",xlab="-Unemployment",pch=16)

plot(-JPNURHARMMDSMEI[index(JPNURHARMMDSMEI)>=as.Date("1980-01-01") & index(JPNURHARMMDSMEI)<=as.Date("2005-08-01")],
     infl.j[index(infl.j)>=as.Date("1980-01-01") & index(infl.j)<=as.Date("2005-08-01")],main="Japan's Phillips Curve",ylab="Inflation (CPI)",xlab="-Unemployment",pch=16)

Plotting Data: Distributions

hist(infl.j)

hist(infl.j,breaks=50)

Modelling: Linear Regression

Modelling: Model Selection

Modelling: arx

Model Selection: From Lecture 4…

Example of Model Selection

getSymbols('JTSQUR',src='FRED',return.class="zoo") #quits
## [1] "JTSQUR"
getSymbols('JTSHIL',src='FRED',return.class="zoo") #total hires
## [1] "JTSHIL"
getSymbols('LNU01300000',src='FRED',return.class="zoo") #participation rate
## [1] "LNU01300000"
getSymbols('UEMPMEAN',src='FRED',return.class="zoo") #average duration of unemployment
## [1] "UEMPMEAN"
plot(JTSJOR,main="Vacancy Rate through time",ylab="%",xlab="Date")

Current and lag of every variable:

partrate <- window(LNU01300000, start="2000-12-01",end="2014-12-01")
uempmean <- window(UEMPMEAN, start="2000-12-01",end="2014-12-01")
unrate <- window(UNRATE, start="2000-12-01",end="2014-12-01")
vars <- data.frame(partrate,uempmean,unrate,JTSHIL,JTSJOR,JTSQUR,
                   "partrate.1"=lag(partrate,-1,na.pad=TRUE),
                   "uempmean.1"=lag(uempmean,-1,na.pad=TRUE),
                   "unrate.1"=lag(unrate,-1,na.pad=TRUE),
                   "JTSHIL.1"=lag(JTSHIL,-1,na.pad=TRUE),
                   "JTSQUR.1"=lag(JTSQUR,-1,na.pad=TRUE),
                   "JTSJOR.1"=lag(JTSJOR,-1,na.pad=TRUE))

vac.mod <- lm(JTSJOR ~ JTSJOR.1 + unrate + unrate.1 + partrate + partrate.1 + 
                JTSHIL + JTSHIL.1 + JTSQUR + JTSQUR.1 + uempmean + uempmean.1, data=vars)
summary(vac.mod)
## 
## Call:
## lm(formula = JTSJOR ~ JTSJOR.1 + unrate + unrate.1 + partrate + 
##     partrate.1 + JTSHIL + JTSHIL.1 + JTSQUR + JTSQUR.1 + uempmean + 
##     uempmean.1, data = vars)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.37444 -0.06651  0.00058  0.07390  0.34219 
## 
## Coefficients:
##                Estimate  Std. Error t value            Pr(>|t|)    
## (Intercept)  2.09266489  1.34423754   1.557              0.1216    
## JTSJOR.1     0.58912942  0.05940339   9.917 <0.0000000000000002 ***
## unrate       0.03834411  0.06948536   0.552              0.5819    
## unrate.1    -0.06202105  0.06767094  -0.917              0.3608    
## partrate    -0.01055820  0.03290480  -0.321              0.7487    
## partrate.1  -0.02959515  0.03351824  -0.883              0.3786    
## JTSHIL       0.00023295  0.00007807   2.984              0.0033 ** 
## JTSHIL.1     0.00002741  0.00008522   0.322              0.7481    
## JTSQUR      -0.10079557  0.15295015  -0.659              0.5109    
## JTSQUR.1     0.29100532  0.14559861   1.999              0.0474 *  
## uempmean     0.00405668  0.01464278   0.277              0.7821    
## uempmean.1   0.00442336  0.01385946   0.319              0.7500    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.1199 on 156 degrees of freedom
##   (1 observation deleted due to missingness)
## Multiple R-squared:  0.9249, Adjusted R-squared:  0.9196 
## F-statistic: 174.6 on 11 and 156 DF,  p-value: < 0.00000000000000022

Stepwise selection:

vac.step <- step(vac.mod)
## Start:  AIC=-701.18
## JTSJOR ~ JTSJOR.1 + unrate + unrate.1 + partrate + partrate.1 + 
##     JTSHIL + JTSHIL.1 + JTSQUR + JTSQUR.1 + uempmean + uempmean.1
## 
##              Df Sum of Sq    RSS     AIC
## - uempmean    1   0.00110 2.2433 -703.09
## - uempmean.1  1   0.00146 2.2436 -703.07
## - partrate    1   0.00148 2.2436 -703.06
## - JTSHIL.1    1   0.00149 2.2437 -703.06
## - unrate      1   0.00438 2.2465 -702.85
## - JTSQUR      1   0.00624 2.2484 -702.71
## - partrate.1  1   0.01121 2.2534 -702.34
## - unrate.1    1   0.01207 2.2542 -702.27
## <none>                    2.2422 -701.18
## - JTSQUR.1    1   0.05742 2.2996 -698.93
## - JTSHIL      1   0.12798 2.3701 -693.85
## - JTSJOR.1    1   1.41365 3.6558 -621.04
## 
## Step:  AIC=-703.09
## JTSJOR ~ JTSJOR.1 + unrate + unrate.1 + partrate + partrate.1 + 
##     JTSHIL + JTSHIL.1 + JTSQUR + JTSQUR.1 + uempmean.1
## 
##              Df Sum of Sq    RSS     AIC
## - JTSHIL.1    1   0.00114 2.2444 -705.01
## - partrate    1   0.00134 2.2446 -704.99
## - unrate      1   0.00397 2.2472 -704.80
## - JTSQUR      1   0.00590 2.2492 -704.65
## - unrate.1    1   0.01112 2.2544 -704.26
## - partrate.1  1   0.01253 2.2558 -704.16
## <none>                    2.2433 -703.09
## - JTSQUR.1    1   0.05686 2.3001 -700.89
## - uempmean.1  1   0.06173 2.3050 -700.53
## - JTSHIL      1   0.13201 2.3753 -695.49
## - JTSJOR.1    1   1.42575 3.6690 -622.44
## 
## Step:  AIC=-705.01
## JTSJOR ~ JTSJOR.1 + unrate + unrate.1 + partrate + partrate.1 + 
##     JTSHIL + JTSQUR + JTSQUR.1 + uempmean.1
## 
##              Df Sum of Sq    RSS     AIC
## - partrate    1   0.00124 2.2456 -706.92
## - unrate      1   0.00327 2.2477 -706.76
## - JTSQUR      1   0.00591 2.2503 -706.57
## - unrate.1    1   0.01037 2.2548 -706.23
## - partrate.1  1   0.01229 2.2567 -706.09
## <none>                    2.2444 -705.01
## - uempmean.1  1   0.06657 2.3110 -702.10
## - JTSQUR.1    1   0.08556 2.3300 -700.72
## - JTSHIL      1   0.14729 2.3917 -696.33
## - JTSJOR.1    1   1.44845 3.6929 -623.35
## 
## Step:  AIC=-706.92
## JTSJOR ~ JTSJOR.1 + unrate + unrate.1 + partrate.1 + JTSHIL + 
##     JTSQUR + JTSQUR.1 + uempmean.1
## 
##              Df Sum of Sq    RSS     AIC
## - unrate      1   0.00269 2.2483 -708.71
## - JTSQUR      1   0.00569 2.2513 -708.49
## - unrate.1    1   0.00957 2.2552 -708.20
## <none>                    2.2456 -706.92
## - partrate.1  1   0.05835 2.3040 -704.61
## - uempmean.1  1   0.07168 2.3173 -703.64
## - JTSQUR.1    1   0.08433 2.3300 -702.72
## - JTSHIL      1   0.14608 2.3917 -698.33
## - JTSJOR.1    1   1.46549 3.7111 -624.52
## 
## Step:  AIC=-708.71
## JTSJOR ~ JTSJOR.1 + unrate.1 + partrate.1 + JTSHIL + JTSQUR + 
##     JTSQUR.1 + uempmean.1
## 
##              Df Sum of Sq    RSS     AIC
## - JTSQUR      1   0.00620 2.2545 -710.25
## - unrate.1    1   0.01965 2.2680 -709.25
## <none>                    2.2483 -708.71
## - partrate.1  1   0.05791 2.3062 -706.44
## - uempmean.1  1   0.06900 2.3173 -705.64
## - JTSQUR.1    1   0.08172 2.3301 -704.72
## - JTSHIL      1   0.14465 2.3930 -700.24
## - JTSJOR.1    1   1.48892 3.7373 -625.34
## 
## Step:  AIC=-710.25
## JTSJOR ~ JTSJOR.1 + unrate.1 + partrate.1 + JTSHIL + JTSQUR.1 + 
##     uempmean.1
## 
##              Df Sum of Sq    RSS     AIC
## - unrate.1    1   0.02018 2.2747 -710.75
## <none>                    2.2545 -710.25
## - partrate.1  1   0.06166 2.3162 -707.72
## - JTSQUR.1    1   0.07682 2.3314 -706.62
## - uempmean.1  1   0.07834 2.3329 -706.51
## - JTSHIL      1   0.18045 2.4350 -699.32
## - JTSJOR.1    1   1.52533 3.7799 -625.44
## 
## Step:  AIC=-710.75
## JTSJOR ~ JTSJOR.1 + partrate.1 + JTSHIL + JTSQUR.1 + uempmean.1
## 
##              Df Sum of Sq    RSS     AIC
## <none>                    2.2747 -710.75
## - uempmean.1  1   0.05937 2.3341 -708.43
## - partrate.1  1   0.09332 2.3680 -706.00
## - JTSQUR.1    1   0.11567 2.3904 -704.42
## - JTSHIL      1   0.25766 2.5324 -694.73
## - JTSJOR.1    1   1.97031 4.2450 -607.94

Gets selection:

xregs <- cbind(vars$unrate[2:NROW(vars)],vars$unrate.1[2:NROW(vars)],
                             vars$partrate[2:NROW(vars)],vars$partrate.1[2:NROW(vars)],
                             vars$JTSHIL[2:NROW(vars)],vars$JTSHIL.1[2:NROW(vars)],
                             vars$JTSQUR[2:NROW(vars)],vars$JTSQUR.1[2:NROW(vars)],
                             vars$uempmean[2:NROW(vars)],vars$uempmean.1[2:NROW(vars)])
colnames(xregs) <- c("unrate","unrate.1","partrate","partrate.1","hires","hires.1","quits","quits.1","unempdur","unempdur.1")
library(gets)
vac.arx <- arx(vars$JTSJOR[2:NROW(vars)],mc=T,ar=1:5,mxreg = xregs)

vac.gets <- getsm(vac.arx)
## Searching path no. 1 out of 12
## Searching path no. 2 out of 12
## Searching path no. 3 out of 12
## Searching path no. 4 out of 12
## Searching path no. 5 out of 12
## Searching path no. 6 out of 12
## Searching path no. 7 out of 12
## Searching path no. 8 out of 12
## Searching path no. 9 out of 12
## Searching path no. 10 out of 12
## Searching path no. 11 out of 12
## Searching path no. 12 out of 12

Modelling: Outlier/Break Detection

Exponential Smoothing

ukcpi.ets <- ets(ukcpi.t)
plot(ukcpi.ets)

Time Series Decomposition

plot(ukcpi.t)
lines(ma(ukcpi.t,50),col=2)

ukcpi.stl <- stl(ukcpi.t,s.window="periodic",t.window=10)
plot(ukcpi.stl)

plot(stl(ukcpi.t,s.window=10,t.window=100))

Forecast Function

ukcpi.ets.f <- forecast(ukcpi.ets,h=60)
ukcpi.stl.f <- forecast(ukcpi.stl,h=60)
inf.forc <- forecast(inf.ar1,h=60)
inf.auto.forc <- forecast(inf.auto,h=60)
plot(ukcpi.t,xlim=range(2008,2016),ylim=range(-0.01,0.05))
lines(inf.forc$mean,col=2)
lines(inf.auto.forc$mean,col=3)
lines(ukcpi.ets.f$mean,col=4)
lines(ukcpi.stl.f$mean,col=5)
legend("bottomleft",lty=1,col=c(1,2,3,4,5),legend=c("Data","AR","auto.arima","ETS","STL"),bty="n",ncol=2)

How to train your model

ukcpi.0 <- window(ukcpi.t,end=c(2012,12))
ukcpi.1 <- window(ukcpi.t,start=c(2013,1))

ukcpi.stl.0 <- stl(ukcpi.0,s.window="periodic",t.window=10)
ukcpi.stl.f.0 <- forecast(ukcpi.stl.0,h=24)
ukcpi.ets.0 <- ets(ukcpi.0)
ukcpi.ets.f.0 <- forecast(ukcpi.ets.0,h=24)
inf.ar1.0 <- Arima(ukcpi.0,order=c(2,1,2))
inf.forc.0 <- forecast(inf.ar1.0,h=24)
inf.auto.0 <- auto.arima(ukcpi.0)
inf.auto.forc.0 <- forecast(inf.auto.0,h=24)

plot(ukcpi.t,xlim=range(2008,2015),ylim=range(0,0.05))
lines(inf.forc.0$mean,col=2)
lines(inf.auto.forc.0$mean,col=3)
lines(ukcpi.ets.f.0$mean,col=4)
lines(ukcpi.stl.f.0$mean,col=5)
legend("bottomleft",lty=1,col=c(1,2,3,4,5),legend=c("Data","AR","auto.arima","ETS","STL"),bty="n",ncol=2)

#check accuracy via various forecast measures
accuracy(inf.forc.0,ukcpi.1)
##                          ME        RMSE         MAE       MPE     MAPE
## Training set -0.00001795836 0.005052316 0.003364368      -Inf      Inf
## Test set     -0.00810631704 0.010338084 0.008212941 -64.68629 65.05564
##                   MASE        ACF1 Theil's U
## Training set 0.1688247 0.001958033        NA
## Test set     0.4121273 0.804603661  5.543466
accuracy(inf.auto.forc.0,ukcpi.1)
##                          ME        RMSE         MAE       MPE     MAPE
## Training set -0.00002114912 0.004142458 0.002845168      -Inf      Inf
## Test set     -0.01207163377 0.013701909 0.012071634 -87.35295 87.35295
##                   MASE         ACF1 Theil's U
## Training set 0.1427712 -0.003581477        NA
## Test set     0.6057573  0.794546894  6.893837
accuracy(ukcpi.ets.f.0,ukcpi.1)
##                          ME       RMSE         MAE       MPE     MAPE
## Training set -0.00002846032 0.00521723 0.003427318      -Inf      Inf
## Test set     -0.00846351111 0.01081692 0.008565600 -67.47745 67.83284
##                   MASE       ACF1 Theil's U
## Training set 0.1719836 0.06423661        NA
## Test set     0.4298238 0.81043717  5.779589
accuracy(ukcpi.stl.f.0,ukcpi.1)
##                          ME       RMSE         MAE
## Training set -0.00001372806 0.00519169 0.003410987
## Test set     -0.00820352912 0.01049068 0.008312254
##                                   MPE                    MAPE      MASE
## Training set -11360238444364688.00000 11360238444364706.00000 0.1711641
## Test set                    -65.46754                65.84349 0.4171108
##                    ACF1 Theil's U
## Training set 0.06229391        NA
## Test set     0.80201728  5.611123
#make table to print out in your output
forc.table <- rbind(accuracy(inf.forc.0,ukcpi.1)[2,],
                    accuracy(inf.auto.forc.0,ukcpi.1)[2,],
                    accuracy(ukcpi.ets.f.0,ukcpi.1)[2,],
                    accuracy(ukcpi.stl.f.0,ukcpi.1)[2,])
rownames(forc.table) <- c("ARIMA","auto.arima","ETS","STL")
kable(forc.table)
ME RMSE MAE MPE MAPE MASE ACF1 Theil’s U
ARIMA -0.0081063 0.0103381 0.0082129 -64.68629 65.05564 0.4121273 0.8046037 5.543466
auto.arima -0.0120716 0.0137019 0.0120716 -87.35295 87.35295 0.6057573 0.7945469 6.893837
ETS -0.0084635 0.0108169 0.0085656 -67.47745 67.83284 0.4298238 0.8104372 5.779590
STL -0.0082035 0.0104907 0.0083123 -65.46754 65.84349 0.4171108 0.8020173 5.611123

Modelling: Very Long Term Data

gs <- read.csv("ukgs_1700_2016.csv",stringsAsFactors=F)
gr <- read.csv("ukgr_1700_2016.csv",stringsAsFactors=F)
plot(gs$Year,gs$Total.Spending.total.percent.GDP,type="l")
lines(gr$Year,gr$Total.Direct.Revenue.total.percent.GDP,type="l",col=2)

Forecast Competition

Forecast Competition League Table

#load data
wk1 <- read.csv("Wk1.csv",stringsAsFactors=F)
wk1$Wk <- 1
wk1$X <- NULL
wk1 <- wk1[!(wk1$Timestamp=="15/01/2015 16:51:56" & wk1$Name=="James Reade"),]
wk2 <- read.csv("Wk2.csv",stringsAsFactors=F)
wk2$Wk <- 2
wk3 <- read.csv("Wk3.csv",stringsAsFactors=F)
wk3$Wk <- 3
wk4 <- read.csv("Wk4.csv",stringsAsFactors=F)
wk4$Wk <- 4
wk5 <- read.csv("Wk5.csv",stringsAsFactors=F)
wk5$Wk <- 5

#want to combine data so first collect all variable names
total.cols <- union(union(colnames(wk1),colnames(wk2)),union(colnames(wk3),colnames(wk4)))
total.cols <- union(total.cols,colnames(wk5))
#next add empty columns to datasets with variable names
for(i in setdiff(total.cols,colnames(wk1))) {wk1[[i]] <- NA}
for(i in setdiff(total.cols,colnames(wk2))) {wk2[[i]] <- NA}
for(i in setdiff(total.cols,colnames(wk3))) {wk3[[i]] <- NA}
for(i in setdiff(total.cols,colnames(wk4))) {wk4[[i]] <- NA}
for(i in setdiff(total.cols,colnames(wk5))) {wk5[[i]] <- NA}
forcs <- rbind(wk1,wk2,wk3,wk4,wk5)
forcs <- forcs[forcs$Name!="Outcome" & forcs$Name!="",]
forcs$Name <- gsub("Desiati Cosimo","Cosimo Desiati",forcs$Name)

#outcomes
outcomes <- data.frame("temp"=0,stringsAsFactors=F)
for(i in total.cols) {outcomes[[i]] <- NA}
outcomes$temp <- NULL
outcomes$What.value.will.FTSE.close.at.on.Friday.January.16. <- ftse$Close[ftse$Date==as.Date("2015-01-16")]
outcomes$What.value.will.the.FTSE.close.at.on.Friday.January.23. <- ftse$Close[ftse$Date==as.Date("2015-01-23")]
outcomes$What.value.will.the.FTSE.close.at.on.Friday.January.30. <- ftse$Close[ftse$Date==as.Date("2015-01-30")]
outcomes$What.value.will.the.FTSE.close.at.on.Friday.February.6. <- ftse$Close[ftse$Date==as.Date("2015-02-06")]
outcomes$What.value.will.the.FTSE.close.at.on.Friday.February.13. <- ftse$Close[ftse$Date==as.Date("2015-02-13")]
outcomes$What.value.will.the.FTSE.close.at.on.Friday.February.20. <- ftse$Close[ftse$Date==as.Date("2015-02-20")]
outcomes$What.will.be.the.relative.search.volume.for.David.Cameron.in.the.week.commencing.January.11. <- 
  dave09$david.cameron[dave09$Week=="2015-01-11 - 2015-01-17"]
outcomes$What.will.be.the.relative.search.volume.for.David.Cameron.in.the.week.commencing.January.18. <- 
  dave09$david.cameron[dave09$Week=="2015-01-18 - 2015-01-24"]
outcomes$What.will.be.the.relative.search.volume.for.David.Cameron.in.the.week.commencing.January.25. <- 
  dave09$david.cameron[dave09$Week=="2015-01-25 - 2015-01-31"]
outcomes$What.will.be.the.relative.search.volume.for.David.Cameron.in.the.week.commencing.February.1. <- 
  dave09$david.cameron[dave09$Week=="2015-02-01 - 2015-02-07"]
outcomes$What.will.be.the.relative.search.volume.for.David.Cameron.in.the.week.commencing.February.8. <- 
  dave23$david.cameron[dave23$Week=="2015-02-08 - 2015-02-14"]
outcomes$What.will.be.the.relative.search.volume.for.David.Cameron.in.the.week.commencing.February.15. <- 
  dave23$david.cameron[dave23$Week=="2015-02-15 - 2015-02-21"]
gdp09 <- read.csv("gdp09.csv",stringsAsFactors=F)
outcomes$What.will.GDP.growth..QoQ..be.for.2014Q4. <- gdp09$Actual[gdp09$DateTime=="20150127 09:30:00"]
trade09 <- read.csv("trade_090215.csv",stringsAsFactors=F)
outcomes$What.will.the.total.trade.balance.be.for.December.2014. <- 
  trade09$Actual[trade09$DateTime=="20150206 09:30:00"]
mgage09 <- read.csv("mortgage_090215.csv",stringsAsFactors=F)
outcomes$What.will.mortgage.approvals.be.for.December.2014. <- mgage09$Actual[mgage09$DateTime=="20150130 09:30:00"]
outcomes$What.will.the.increase.in.CPI..YoY..be.for.January.2015. <- tail(cpi23,1)

#first create forecast absolute % errors
abspc <- data.frame("Wk"=forcs$Wk,"Name"=forcs$Name)
for(i in colnames(outcomes)[grep("What.",colnames(outcomes))]) {
  if(is.na(outcomes[,i])==F) {
    abspc[[i]] <- abs((as.numeric(forcs[,i]) - as.numeric(outcomes[,i]))/as.numeric(outcomes[,i]))
  } else {
    abspc[[i]] <- NA
  }
}
## Warning in if (is.na(outcomes[, i]) == F) {: the condition has length > 1
## and only the first element will be used
## Warning: NAs introduced by coercion
## Warning in as.numeric(forcs[, i]) - as.numeric(outcomes[, i]): longer
## object length is not a multiple of shorter object length
## Warning: NAs introduced by coercion
## Warning in (as.numeric(forcs[, i]) - as.numeric(outcomes[,
## i]))/as.numeric(outcomes[, : longer object length is not a multiple of
## shorter object length
#first metric: APE to 1DP
#first create matrix with column per name and forecasts down columns
ppl <- data.frame("temp"=rep(NA,4*NROW(grep("What.",colnames(abspc)))))
names <- abspc$Name[duplicated(abspc$Name)==F]
for(i in names) {
  person.forcs <- abspc[abspc$Name==i,c(1,grep("What.",colnames(abspc)))]
  full.person.forcs <- c()
  for(j in 1:4) {
    if(j %in% person.forcs$Wk) {
      full.person.forcs <- cbind(full.person.forcs,
                                 t(as.numeric(person.forcs[person.forcs$Wk==j,
                                                           grep("What.",colnames(person.forcs))])))
    } else {
      full.person.forcs <- cbind(full.person.forcs,t(rep(NA,NROW(grep("What.",colnames(person.forcs))))))
    }    
  }
  ppl[[i]] <- t(full.person.forcs)
}
ppl$temp <- NULL
mape <- round(colMeans(ppl,na.rm=T),1)
#tie-breaker: number of forecasts
no.forcs <- colSums(is.na(ppl)==F)
#table
lge.tab <- data.frame("Name"=colnames(ppl),"MAPE"=as.numeric(mape),"No. Forecasts"=as.numeric(no.forcs),
                      "Adjusted"=as.numeric(mape)-as.numeric(no.forcs)/2000)
lge.tab <- lge.tab[order(lge.tab$Adjusted),]
kable(lge.tab[1:10,])
Name MAPE No..Forecasts Adjusted
6 James Reade 0.1 39 0.0805
12 Adam Shermon 0.3 5 0.2975
8 Lloyd Morrish-Thomas 0.4 32 0.3840
16 Chris Tucker 0.4 8 0.3960
17 Luke 0.4 2 0.3990
4 Will Colwell 0.5 15 0.4925
19 Matthew 0.5 3 0.4985
3 Neil Chandratreya 0.5 2 0.4990
13 Marios H 0.5 2 0.4990
18 JMW 0.5 2 0.4990