J James Reade
24-25/02/2015
All files used in this presentation in: https://www.dropbox.com/sh/82frbc1gid4ffw8/AABNOt4TAS16UfpFaVH3u2vya?dl=0
Mock midterm this week.
library(Quandl)
## Loading required package: xts
## Loading required package: zoo
##
## Attaching package: 'zoo'
##
## The following objects are masked from 'package:base':
##
## as.Date, as.Date.numeric
Quandl.auth("y8y3ezt48WZeqUqq1yQd")
library(quantmod)
## Loading required package: TTR
## Version 0.4-0 included new data defaults. See ?getSymbols.
library(zoo)
library(forecast)
## Loading required package: timeDate
## This is forecast 5.8
library(knitr)
options(scipen=13)
setwd("/home/readejj/Dropbox/Teaching/Reading/ec313/2015/Lecture-10")
dave09 <- read.csv("Dave_090215.csv",stringsAsFactors=F)
dave09$Week2 <- as.Date(substr(dave09$Week,1,10))
plot(dave09$Week2,dave09$david.cameron,
main="Google Search Popularity of David Cameron",
ylab="Search popularity",xlab="Date",type="l",ylim=range(0,20))
dave09.ts <- ts(dave09$david.cameron,start=c(2004,1),freq=52)
tsdisplay(dave09.ts)
dave09.arima <- auto.arima(dave09.ts)
dave09.forc <- forecast(dave09.arima,h=8)
plot(dave09.forc,include=100)
dave09.forc$mean
## Time Series:
## Start = c(2015, 8)
## End = c(2015, 15)
## Frequency = 52
## [1] 4.977339 4.967643 4.963494 4.961719 4.960959 4.960634 4.960495 4.960435
dave23 <- read.csv("dave_230215.csv",stringsAsFactors=F)
tail(dave23,2)
## Week david.cameron
## 580 2015-02-08 - 2015-02-14 4
## 581 2015-02-15 - 2015-02-21 4
cpi <- read.csv("cpi_090215.csv",stringsAsFactors=F)
cpi.t <- ts(cpi$Actual[order(cpi$DateTime)],start=c(2007,1),freq=12)
cpi.t.a <- auto.arima(cpi.t)
cpi.t.f <- forecast(cpi.t.a,h=10)
plot(cpi.t.f,main="Forecasts of CPI Inflation",ylab="Inflation (%, YoY)",xlab="Date")
cpi.t.f$mean
## Jan Feb Mar Apr May Jun Jul
## 2015 0.5015949 0.6033607 0.6889686 0.7836768 0.9564424 0.8226495 0.9617996
## Aug Sep Oct
## 2015 1.0601266 1.2608149 1.2895016
cpi23 <- read.csv("cpi_230215.csv",stringsAsFactors=F)
tail(cpi23,1)
## DateTime Actual Consensus Previous
## 97 20070213 09:30:00 2.7 3 NA
getSymbols("GBRCPIALLMINMEI",src="FRED",return.class="zoo") # CPI since 1955 (from FRED)
## As of 0.4-0, 'getSymbols' uses env=parent.frame() and
## auto.assign=TRUE by default.
##
## This behavior will be phased out in 0.5-0 when the call will
## default to use auto.assign=FALSE. getOption("getSymbols.env") and
## getOptions("getSymbols.auto.assign") are now checked for alternate defaults
##
## This message is shown once per session and may be disabled by setting
## options("getSymbols.warning4.0"=FALSE). See ?getSymbol for more details
## [1] "GBRCPIALLMINMEI"
ukcpi <- as.numeric(coredata(diff(log(GBRCPIALLMINMEI),differences=1,lag=12)))
ukcpi.t <- ts(ukcpi,start=c(1956,1),freq=12)
tsdisplay(ukcpi.t)
tsdisplay(diff(ukcpi.t,differences=1))
inf.ar1 <- Arima(ukcpi.t,order=c(2,1,2))
inf.ar2 <- Arima(ukcpi.t,order=c(3,1,2))
inf.ar3 <- Arima(ukcpi.t,order=c(1,1,2))
inf.ar4 <- Arima(ukcpi.t,order=c(2,1,3))
inf.ar5 <- Arima(ukcpi.t,order=c(2,1,1))
aiccs <- c(inf.ar1$aicc,inf.ar2$aicc,inf.ar3$aicc,inf.ar4$aicc,inf.ar5$aicc)
inf.auto <- auto.arima(ukcpi.t)
inf.forc <- forecast(inf.ar1,h=60)
inf.auto.forc <- forecast(inf.auto,h=60)
plot(ukcpi.t,xlim=range(2008,2020),ylim=range(0,0.05),main="UK CPI Inflation",ylab="%",xlab="Date")
lines(inf.forc$mean,col="blue")
lines(inf.auto.forc$mean,col="red")
ftse <- Quandl("YAHOO/INDEX_FTSE")
ftse <- ftse[order(ftse$Date),]
ftse.0 <- ftse[ftse$Date<as.Date("2015-02-13"),]
ftse.auto <- auto.arima(ftse$Close)
ftse.forc <- forecast(ftse.auto,h=50)
plot(ftse.forc,include=100)
ftse.forc$mean[seq(1,35,5)]
## [1] 6948.161 6949.152 6952.735 6956.367 6960.000 6963.632 6967.264
ftse[ftse$Date==as.Date("2015-02-13") | ftse$Date==as.Date("2015-02-20"),]
## Date Open High Low Close Volume Adjusted Close
## 8 2015-02-13 6828.1 6887.6 6828.1 6873.5 0 6873.5
## 3 2015-02-20 6888.9 6920.5 6884.8 6915.2 764028800 6915.2
getSymbols("YHOO",src="google",return.class="zoo") # from google finance
## [1] "YHOO"
plot(YHOO)
getSymbols("GOOG",src="yahoo",return.class="zoo") # from yahoo finance
## [1] "GOOG"
plot(GOOG)
getSymbols("DEXJPUS",src="FRED",return.class="zoo") # FX rates from FRED
## [1] "DEXJPUS"
plot(DEXJPUS)
read.csv() is easiest way:
Import Dataset…stringsAsFactors=F.percent GDP and go from 1700 to 2016.gs <- read.csv("ukgs_1700_2016.csv",stringsAsFactors=F)
head(gs)
## Year GDP.UK...163..billion Population.UK.million
## 1 1700 0.06058 9.100
## 2 1701 0.0608 9.129
## 3 1702 0.06102 9.158
## 4 1703 0.06124 9.188
## 5 1704 0.06146 9.217
## 6 1705 0.06168 9.247
## Total.Spending.total.percent.GDP X
## 1 5.28 a
## 2 5.66 a
## 3 8.21 a
## 4 8.68 a
## 5 8.99 a
## 6 9.52 a
str(gs)
## 'data.frame': 317 obs. of 5 variables:
## $ Year : int 1700 1701 1702 1703 1704 1705 1706 1707 1708 1709 ...
## $ GDP.UK...163..billion : chr "0.06058" "0.0608" "0.06102" "0.06124" ...
## $ Population.UK.million : num 9.1 9.13 9.16 9.19 9.22 ...
## $ Total.Spending.total.percent.GDP: num 5.28 5.66 8.21 8.68 8.99 ...
## $ X : chr "a" "a" "a" "a" ...
type to “l” for line, “p” for points, “o” for both.main = "Title of plot", labels (xlab, ylab).plot(gs$Year,gs$Total.Spending.total.percent.GDP,type="l")
getSymbols('JTSJOR',src='FRED',return.class="zoo")
## [1] "JTSJOR"
getSymbols('UNRATE',src='FRED',return.class="zoo")
## [1] "UNRATE"
getSymbols('CPIAUCSL',src='FRED',return.class="zoo")
## [1] "CPIAUCSL"
plot(UNRATE,JTSJOR)
infl <- diff(log(CPIAUCSL),differences=1,lag=12)
plot(UNRATE,infl,main="US Phillips Curve?",ylab="Inflation",xlab="Unemployment Rate")
getSymbols('JPNURHARMMDSMEI',src='FRED',return.class="zoo")
## [1] "JPNURHARMMDSMEI"
getSymbols('JPNCPIALLMINMEI',src='FRED',return.class="zoo")
## [1] "JPNCPIALLMINMEI"
infl.j <- diff(log(JPNCPIALLMINMEI),differences=1,lag=12)
plot(JPNURHARMMDSMEI,infl.j,main="Japan's Phillips Curve",ylab="Inflation (CPI)",xlab="Unemployment",pch=16)
plot(-JPNURHARMMDSMEI,infl.j,main="Japan's Phillips Curve",ylab="Inflation (CPI)",xlab="-Unemployment",pch=16)
plot(-JPNURHARMMDSMEI[index(JPNURHARMMDSMEI)>=as.Date("1980-01-01") & index(JPNURHARMMDSMEI)<=as.Date("2005-08-01")],
infl.j[index(infl.j)>=as.Date("1980-01-01") & index(infl.j)<=as.Date("2005-08-01")],main="Japan's Phillips Curve",ylab="Inflation (CPI)",xlab="-Unemployment",pch=16)
hist() is the main command for density plots.
breaks option can be used to create more bars (can be more informative).hist(infl.j)
hist(infl.j,breaks=50)
lm().
lm(dep.var ~ ind.var.1 + ind.var.2 + ...,data=dataset)install.packages("gets")library(gets)getSymbols('JTSQUR',src='FRED',return.class="zoo") #quits
## [1] "JTSQUR"
getSymbols('JTSHIL',src='FRED',return.class="zoo") #total hires
## [1] "JTSHIL"
getSymbols('LNU01300000',src='FRED',return.class="zoo") #participation rate
## [1] "LNU01300000"
getSymbols('UEMPMEAN',src='FRED',return.class="zoo") #average duration of unemployment
## [1] "UEMPMEAN"
plot(JTSJOR,main="Vacancy Rate through time",ylab="%",xlab="Date")
Current and lag of every variable:
partrate <- window(LNU01300000, start="2000-12-01",end="2014-12-01")
uempmean <- window(UEMPMEAN, start="2000-12-01",end="2014-12-01")
unrate <- window(UNRATE, start="2000-12-01",end="2014-12-01")
vars <- data.frame(partrate,uempmean,unrate,JTSHIL,JTSJOR,JTSQUR,
"partrate.1"=lag(partrate,-1,na.pad=TRUE),
"uempmean.1"=lag(uempmean,-1,na.pad=TRUE),
"unrate.1"=lag(unrate,-1,na.pad=TRUE),
"JTSHIL.1"=lag(JTSHIL,-1,na.pad=TRUE),
"JTSQUR.1"=lag(JTSQUR,-1,na.pad=TRUE),
"JTSJOR.1"=lag(JTSJOR,-1,na.pad=TRUE))
vac.mod <- lm(JTSJOR ~ JTSJOR.1 + unrate + unrate.1 + partrate + partrate.1 +
JTSHIL + JTSHIL.1 + JTSQUR + JTSQUR.1 + uempmean + uempmean.1, data=vars)
summary(vac.mod)
##
## Call:
## lm(formula = JTSJOR ~ JTSJOR.1 + unrate + unrate.1 + partrate +
## partrate.1 + JTSHIL + JTSHIL.1 + JTSQUR + JTSQUR.1 + uempmean +
## uempmean.1, data = vars)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.37444 -0.06651 0.00058 0.07390 0.34219
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 2.09266489 1.34423754 1.557 0.1216
## JTSJOR.1 0.58912942 0.05940339 9.917 <0.0000000000000002 ***
## unrate 0.03834411 0.06948536 0.552 0.5819
## unrate.1 -0.06202105 0.06767094 -0.917 0.3608
## partrate -0.01055820 0.03290480 -0.321 0.7487
## partrate.1 -0.02959515 0.03351824 -0.883 0.3786
## JTSHIL 0.00023295 0.00007807 2.984 0.0033 **
## JTSHIL.1 0.00002741 0.00008522 0.322 0.7481
## JTSQUR -0.10079557 0.15295015 -0.659 0.5109
## JTSQUR.1 0.29100532 0.14559861 1.999 0.0474 *
## uempmean 0.00405668 0.01464278 0.277 0.7821
## uempmean.1 0.00442336 0.01385946 0.319 0.7500
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.1199 on 156 degrees of freedom
## (1 observation deleted due to missingness)
## Multiple R-squared: 0.9249, Adjusted R-squared: 0.9196
## F-statistic: 174.6 on 11 and 156 DF, p-value: < 0.00000000000000022
Stepwise selection:
vac.step <- step(vac.mod)
## Start: AIC=-701.18
## JTSJOR ~ JTSJOR.1 + unrate + unrate.1 + partrate + partrate.1 +
## JTSHIL + JTSHIL.1 + JTSQUR + JTSQUR.1 + uempmean + uempmean.1
##
## Df Sum of Sq RSS AIC
## - uempmean 1 0.00110 2.2433 -703.09
## - uempmean.1 1 0.00146 2.2436 -703.07
## - partrate 1 0.00148 2.2436 -703.06
## - JTSHIL.1 1 0.00149 2.2437 -703.06
## - unrate 1 0.00438 2.2465 -702.85
## - JTSQUR 1 0.00624 2.2484 -702.71
## - partrate.1 1 0.01121 2.2534 -702.34
## - unrate.1 1 0.01207 2.2542 -702.27
## <none> 2.2422 -701.18
## - JTSQUR.1 1 0.05742 2.2996 -698.93
## - JTSHIL 1 0.12798 2.3701 -693.85
## - JTSJOR.1 1 1.41365 3.6558 -621.04
##
## Step: AIC=-703.09
## JTSJOR ~ JTSJOR.1 + unrate + unrate.1 + partrate + partrate.1 +
## JTSHIL + JTSHIL.1 + JTSQUR + JTSQUR.1 + uempmean.1
##
## Df Sum of Sq RSS AIC
## - JTSHIL.1 1 0.00114 2.2444 -705.01
## - partrate 1 0.00134 2.2446 -704.99
## - unrate 1 0.00397 2.2472 -704.80
## - JTSQUR 1 0.00590 2.2492 -704.65
## - unrate.1 1 0.01112 2.2544 -704.26
## - partrate.1 1 0.01253 2.2558 -704.16
## <none> 2.2433 -703.09
## - JTSQUR.1 1 0.05686 2.3001 -700.89
## - uempmean.1 1 0.06173 2.3050 -700.53
## - JTSHIL 1 0.13201 2.3753 -695.49
## - JTSJOR.1 1 1.42575 3.6690 -622.44
##
## Step: AIC=-705.01
## JTSJOR ~ JTSJOR.1 + unrate + unrate.1 + partrate + partrate.1 +
## JTSHIL + JTSQUR + JTSQUR.1 + uempmean.1
##
## Df Sum of Sq RSS AIC
## - partrate 1 0.00124 2.2456 -706.92
## - unrate 1 0.00327 2.2477 -706.76
## - JTSQUR 1 0.00591 2.2503 -706.57
## - unrate.1 1 0.01037 2.2548 -706.23
## - partrate.1 1 0.01229 2.2567 -706.09
## <none> 2.2444 -705.01
## - uempmean.1 1 0.06657 2.3110 -702.10
## - JTSQUR.1 1 0.08556 2.3300 -700.72
## - JTSHIL 1 0.14729 2.3917 -696.33
## - JTSJOR.1 1 1.44845 3.6929 -623.35
##
## Step: AIC=-706.92
## JTSJOR ~ JTSJOR.1 + unrate + unrate.1 + partrate.1 + JTSHIL +
## JTSQUR + JTSQUR.1 + uempmean.1
##
## Df Sum of Sq RSS AIC
## - unrate 1 0.00269 2.2483 -708.71
## - JTSQUR 1 0.00569 2.2513 -708.49
## - unrate.1 1 0.00957 2.2552 -708.20
## <none> 2.2456 -706.92
## - partrate.1 1 0.05835 2.3040 -704.61
## - uempmean.1 1 0.07168 2.3173 -703.64
## - JTSQUR.1 1 0.08433 2.3300 -702.72
## - JTSHIL 1 0.14608 2.3917 -698.33
## - JTSJOR.1 1 1.46549 3.7111 -624.52
##
## Step: AIC=-708.71
## JTSJOR ~ JTSJOR.1 + unrate.1 + partrate.1 + JTSHIL + JTSQUR +
## JTSQUR.1 + uempmean.1
##
## Df Sum of Sq RSS AIC
## - JTSQUR 1 0.00620 2.2545 -710.25
## - unrate.1 1 0.01965 2.2680 -709.25
## <none> 2.2483 -708.71
## - partrate.1 1 0.05791 2.3062 -706.44
## - uempmean.1 1 0.06900 2.3173 -705.64
## - JTSQUR.1 1 0.08172 2.3301 -704.72
## - JTSHIL 1 0.14465 2.3930 -700.24
## - JTSJOR.1 1 1.48892 3.7373 -625.34
##
## Step: AIC=-710.25
## JTSJOR ~ JTSJOR.1 + unrate.1 + partrate.1 + JTSHIL + JTSQUR.1 +
## uempmean.1
##
## Df Sum of Sq RSS AIC
## - unrate.1 1 0.02018 2.2747 -710.75
## <none> 2.2545 -710.25
## - partrate.1 1 0.06166 2.3162 -707.72
## - JTSQUR.1 1 0.07682 2.3314 -706.62
## - uempmean.1 1 0.07834 2.3329 -706.51
## - JTSHIL 1 0.18045 2.4350 -699.32
## - JTSJOR.1 1 1.52533 3.7799 -625.44
##
## Step: AIC=-710.75
## JTSJOR ~ JTSJOR.1 + partrate.1 + JTSHIL + JTSQUR.1 + uempmean.1
##
## Df Sum of Sq RSS AIC
## <none> 2.2747 -710.75
## - uempmean.1 1 0.05937 2.3341 -708.43
## - partrate.1 1 0.09332 2.3680 -706.00
## - JTSQUR.1 1 0.11567 2.3904 -704.42
## - JTSHIL 1 0.25766 2.5324 -694.73
## - JTSJOR.1 1 1.97031 4.2450 -607.94
Gets selection:
xregs <- cbind(vars$unrate[2:NROW(vars)],vars$unrate.1[2:NROW(vars)],
vars$partrate[2:NROW(vars)],vars$partrate.1[2:NROW(vars)],
vars$JTSHIL[2:NROW(vars)],vars$JTSHIL.1[2:NROW(vars)],
vars$JTSQUR[2:NROW(vars)],vars$JTSQUR.1[2:NROW(vars)],
vars$uempmean[2:NROW(vars)],vars$uempmean.1[2:NROW(vars)])
colnames(xregs) <- c("unrate","unrate.1","partrate","partrate.1","hires","hires.1","quits","quits.1","unempdur","unempdur.1")
library(gets)
vac.arx <- arx(vars$JTSJOR[2:NROW(vars)],mc=T,ar=1:5,mxreg = xregs)
vac.gets <- getsm(vac.arx)
## Searching path no. 1 out of 12
## Searching path no. 2 out of 12
## Searching path no. 3 out of 12
## Searching path no. 4 out of 12
## Searching path no. 5 out of 12
## Searching path no. 6 out of 12
## Searching path no. 7 out of 12
## Searching path no. 8 out of 12
## Searching path no. 9 out of 12
## Searching path no. 10 out of 12
## Searching path no. 11 out of 12
## Searching path no. 12 out of 12
strucchangegetsets() function in R does everything.
ukcpi.ets <- ets(ukcpi.t)
plot(ukcpi.ets)
Recall model (recap at https://www.otexts.org/fpp/6: \[ y_{t} = S_{t} + T_{t} + E_{t}, \]
stl().Of constituent parts, from classical method, ma() carries out moving average:
plot(ukcpi.t)
lines(ma(ukcpi.t,50),col=2)
ukcpi.stl <- stl(ukcpi.t,s.window="periodic",t.window=10)
plot(ukcpi.stl)
plot(stl(ukcpi.t,s.window=10,t.window=100))
forecast() function works well with standard models:
Arima() function.
stl() function.ets() function.plot() function.forecast() returns a list with various useful items:
$mean contains the actual forecast values.$upper and $lower contains the upper and lower confidence intervals (80 and 95%).ukcpi.ets.f <- forecast(ukcpi.ets,h=60)
ukcpi.stl.f <- forecast(ukcpi.stl,h=60)
inf.forc <- forecast(inf.ar1,h=60)
inf.auto.forc <- forecast(inf.auto,h=60)
plot(ukcpi.t,xlim=range(2008,2016),ylim=range(-0.01,0.05))
lines(inf.forc$mean,col=2)
lines(inf.auto.forc$mean,col=3)
lines(ukcpi.ets.f$mean,col=4)
lines(ukcpi.stl.f$mean,col=5)
legend("bottomleft",lty=1,col=c(1,2,3,4,5),legend=c("Data","AR","auto.arima","ETS","STL"),bty="n",ncol=2)
ukcpi.0 <- window(ukcpi.t,end=c(2012,12))
ukcpi.1 <- window(ukcpi.t,start=c(2013,1))
ukcpi.stl.0 <- stl(ukcpi.0,s.window="periodic",t.window=10)
ukcpi.stl.f.0 <- forecast(ukcpi.stl.0,h=24)
ukcpi.ets.0 <- ets(ukcpi.0)
ukcpi.ets.f.0 <- forecast(ukcpi.ets.0,h=24)
inf.ar1.0 <- Arima(ukcpi.0,order=c(2,1,2))
inf.forc.0 <- forecast(inf.ar1.0,h=24)
inf.auto.0 <- auto.arima(ukcpi.0)
inf.auto.forc.0 <- forecast(inf.auto.0,h=24)
plot(ukcpi.t,xlim=range(2008,2015),ylim=range(0,0.05))
lines(inf.forc.0$mean,col=2)
lines(inf.auto.forc.0$mean,col=3)
lines(ukcpi.ets.f.0$mean,col=4)
lines(ukcpi.stl.f.0$mean,col=5)
legend("bottomleft",lty=1,col=c(1,2,3,4,5),legend=c("Data","AR","auto.arima","ETS","STL"),bty="n",ncol=2)
#check accuracy via various forecast measures
accuracy(inf.forc.0,ukcpi.1)
## ME RMSE MAE MPE MAPE
## Training set -0.00001795836 0.005052316 0.003364368 -Inf Inf
## Test set -0.00810631704 0.010338084 0.008212941 -64.68629 65.05564
## MASE ACF1 Theil's U
## Training set 0.1688247 0.001958033 NA
## Test set 0.4121273 0.804603661 5.543466
accuracy(inf.auto.forc.0,ukcpi.1)
## ME RMSE MAE MPE MAPE
## Training set -0.00002114912 0.004142458 0.002845168 -Inf Inf
## Test set -0.01207163377 0.013701909 0.012071634 -87.35295 87.35295
## MASE ACF1 Theil's U
## Training set 0.1427712 -0.003581477 NA
## Test set 0.6057573 0.794546894 6.893837
accuracy(ukcpi.ets.f.0,ukcpi.1)
## ME RMSE MAE MPE MAPE
## Training set -0.00002846032 0.00521723 0.003427318 -Inf Inf
## Test set -0.00846351111 0.01081692 0.008565600 -67.47745 67.83284
## MASE ACF1 Theil's U
## Training set 0.1719836 0.06423661 NA
## Test set 0.4298238 0.81043717 5.779589
accuracy(ukcpi.stl.f.0,ukcpi.1)
## ME RMSE MAE
## Training set -0.00001372806 0.00519169 0.003410987
## Test set -0.00820352912 0.01049068 0.008312254
## MPE MAPE MASE
## Training set -11360238444364688.00000 11360238444364706.00000 0.1711641
## Test set -65.46754 65.84349 0.4171108
## ACF1 Theil's U
## Training set 0.06229391 NA
## Test set 0.80201728 5.611123
#make table to print out in your output
forc.table <- rbind(accuracy(inf.forc.0,ukcpi.1)[2,],
accuracy(inf.auto.forc.0,ukcpi.1)[2,],
accuracy(ukcpi.ets.f.0,ukcpi.1)[2,],
accuracy(ukcpi.stl.f.0,ukcpi.1)[2,])
rownames(forc.table) <- c("ARIMA","auto.arima","ETS","STL")
kable(forc.table)
| ME | RMSE | MAE | MPE | MAPE | MASE | ACF1 | Theil’s U | |
|---|---|---|---|---|---|---|---|---|
| ARIMA | -0.0081063 | 0.0103381 | 0.0082129 | -64.68629 | 65.05564 | 0.4121273 | 0.8046037 | 5.543466 |
| auto.arima | -0.0120716 | 0.0137019 | 0.0120716 | -87.35295 | 87.35295 | 0.6057573 | 0.7945469 | 6.893837 |
| ETS | -0.0084635 | 0.0108169 | 0.0085656 | -67.47745 | 67.83284 | 0.4298238 | 0.8104372 | 5.779590 |
| STL | -0.0082035 | 0.0104907 | 0.0083123 | -65.46754 | 65.84349 | 0.4171108 | 0.8020173 | 5.611123 |
gs <- read.csv("ukgs_1700_2016.csv",stringsAsFactors=F)
gr <- read.csv("ukgr_1700_2016.csv",stringsAsFactors=F)
plot(gs$Year,gs$Total.Spending.total.percent.GDP,type="l")
lines(gr$Year,gr$Total.Direct.Revenue.total.percent.GDP,type="l",col=2)
#load data
wk1 <- read.csv("Wk1.csv",stringsAsFactors=F)
wk1$Wk <- 1
wk1$X <- NULL
wk1 <- wk1[!(wk1$Timestamp=="15/01/2015 16:51:56" & wk1$Name=="James Reade"),]
wk2 <- read.csv("Wk2.csv",stringsAsFactors=F)
wk2$Wk <- 2
wk3 <- read.csv("Wk3.csv",stringsAsFactors=F)
wk3$Wk <- 3
wk4 <- read.csv("Wk4.csv",stringsAsFactors=F)
wk4$Wk <- 4
wk5 <- read.csv("Wk5.csv",stringsAsFactors=F)
wk5$Wk <- 5
#want to combine data so first collect all variable names
total.cols <- union(union(colnames(wk1),colnames(wk2)),union(colnames(wk3),colnames(wk4)))
total.cols <- union(total.cols,colnames(wk5))
#next add empty columns to datasets with variable names
for(i in setdiff(total.cols,colnames(wk1))) {wk1[[i]] <- NA}
for(i in setdiff(total.cols,colnames(wk2))) {wk2[[i]] <- NA}
for(i in setdiff(total.cols,colnames(wk3))) {wk3[[i]] <- NA}
for(i in setdiff(total.cols,colnames(wk4))) {wk4[[i]] <- NA}
for(i in setdiff(total.cols,colnames(wk5))) {wk5[[i]] <- NA}
forcs <- rbind(wk1,wk2,wk3,wk4,wk5)
forcs <- forcs[forcs$Name!="Outcome" & forcs$Name!="",]
forcs$Name <- gsub("Desiati Cosimo","Cosimo Desiati",forcs$Name)
#outcomes
outcomes <- data.frame("temp"=0,stringsAsFactors=F)
for(i in total.cols) {outcomes[[i]] <- NA}
outcomes$temp <- NULL
outcomes$What.value.will.FTSE.close.at.on.Friday.January.16. <- ftse$Close[ftse$Date==as.Date("2015-01-16")]
outcomes$What.value.will.the.FTSE.close.at.on.Friday.January.23. <- ftse$Close[ftse$Date==as.Date("2015-01-23")]
outcomes$What.value.will.the.FTSE.close.at.on.Friday.January.30. <- ftse$Close[ftse$Date==as.Date("2015-01-30")]
outcomes$What.value.will.the.FTSE.close.at.on.Friday.February.6. <- ftse$Close[ftse$Date==as.Date("2015-02-06")]
outcomes$What.value.will.the.FTSE.close.at.on.Friday.February.13. <- ftse$Close[ftse$Date==as.Date("2015-02-13")]
outcomes$What.value.will.the.FTSE.close.at.on.Friday.February.20. <- ftse$Close[ftse$Date==as.Date("2015-02-20")]
outcomes$What.will.be.the.relative.search.volume.for.David.Cameron.in.the.week.commencing.January.11. <-
dave09$david.cameron[dave09$Week=="2015-01-11 - 2015-01-17"]
outcomes$What.will.be.the.relative.search.volume.for.David.Cameron.in.the.week.commencing.January.18. <-
dave09$david.cameron[dave09$Week=="2015-01-18 - 2015-01-24"]
outcomes$What.will.be.the.relative.search.volume.for.David.Cameron.in.the.week.commencing.January.25. <-
dave09$david.cameron[dave09$Week=="2015-01-25 - 2015-01-31"]
outcomes$What.will.be.the.relative.search.volume.for.David.Cameron.in.the.week.commencing.February.1. <-
dave09$david.cameron[dave09$Week=="2015-02-01 - 2015-02-07"]
outcomes$What.will.be.the.relative.search.volume.for.David.Cameron.in.the.week.commencing.February.8. <-
dave23$david.cameron[dave23$Week=="2015-02-08 - 2015-02-14"]
outcomes$What.will.be.the.relative.search.volume.for.David.Cameron.in.the.week.commencing.February.15. <-
dave23$david.cameron[dave23$Week=="2015-02-15 - 2015-02-21"]
gdp09 <- read.csv("gdp09.csv",stringsAsFactors=F)
outcomes$What.will.GDP.growth..QoQ..be.for.2014Q4. <- gdp09$Actual[gdp09$DateTime=="20150127 09:30:00"]
trade09 <- read.csv("trade_090215.csv",stringsAsFactors=F)
outcomes$What.will.the.total.trade.balance.be.for.December.2014. <-
trade09$Actual[trade09$DateTime=="20150206 09:30:00"]
mgage09 <- read.csv("mortgage_090215.csv",stringsAsFactors=F)
outcomes$What.will.mortgage.approvals.be.for.December.2014. <- mgage09$Actual[mgage09$DateTime=="20150130 09:30:00"]
outcomes$What.will.the.increase.in.CPI..YoY..be.for.January.2015. <- tail(cpi23,1)
#first create forecast absolute % errors
abspc <- data.frame("Wk"=forcs$Wk,"Name"=forcs$Name)
for(i in colnames(outcomes)[grep("What.",colnames(outcomes))]) {
if(is.na(outcomes[,i])==F) {
abspc[[i]] <- abs((as.numeric(forcs[,i]) - as.numeric(outcomes[,i]))/as.numeric(outcomes[,i]))
} else {
abspc[[i]] <- NA
}
}
## Warning in if (is.na(outcomes[, i]) == F) {: the condition has length > 1
## and only the first element will be used
## Warning: NAs introduced by coercion
## Warning in as.numeric(forcs[, i]) - as.numeric(outcomes[, i]): longer
## object length is not a multiple of shorter object length
## Warning: NAs introduced by coercion
## Warning in (as.numeric(forcs[, i]) - as.numeric(outcomes[,
## i]))/as.numeric(outcomes[, : longer object length is not a multiple of
## shorter object length
#first metric: APE to 1DP
#first create matrix with column per name and forecasts down columns
ppl <- data.frame("temp"=rep(NA,4*NROW(grep("What.",colnames(abspc)))))
names <- abspc$Name[duplicated(abspc$Name)==F]
for(i in names) {
person.forcs <- abspc[abspc$Name==i,c(1,grep("What.",colnames(abspc)))]
full.person.forcs <- c()
for(j in 1:4) {
if(j %in% person.forcs$Wk) {
full.person.forcs <- cbind(full.person.forcs,
t(as.numeric(person.forcs[person.forcs$Wk==j,
grep("What.",colnames(person.forcs))])))
} else {
full.person.forcs <- cbind(full.person.forcs,t(rep(NA,NROW(grep("What.",colnames(person.forcs))))))
}
}
ppl[[i]] <- t(full.person.forcs)
}
ppl$temp <- NULL
mape <- round(colMeans(ppl,na.rm=T),1)
#tie-breaker: number of forecasts
no.forcs <- colSums(is.na(ppl)==F)
#table
lge.tab <- data.frame("Name"=colnames(ppl),"MAPE"=as.numeric(mape),"No. Forecasts"=as.numeric(no.forcs),
"Adjusted"=as.numeric(mape)-as.numeric(no.forcs)/2000)
lge.tab <- lge.tab[order(lge.tab$Adjusted),]
kable(lge.tab[1:10,])
| Name | MAPE | No..Forecasts | Adjusted | |
|---|---|---|---|---|
| 6 | James Reade | 0.1 | 39 | 0.0805 |
| 12 | Adam Shermon | 0.3 | 5 | 0.2975 |
| 8 | Lloyd Morrish-Thomas | 0.4 | 32 | 0.3840 |
| 16 | Chris Tucker | 0.4 | 8 | 0.3960 |
| 17 | Luke | 0.4 | 2 | 0.3990 |
| 4 | Will Colwell | 0.5 | 15 | 0.4925 |
| 19 | Matthew | 0.5 | 3 | 0.4985 |
| 3 | Neil Chandratreya | 0.5 | 2 | 0.4990 |
| 13 | Marios H | 0.5 | 2 | 0.4990 |
| 18 | JMW | 0.5 | 2 | 0.4990 |