EC313 Business and Financial Forecasting Lecture 5

J James Reade

26/01/2015

Introduction

\[ \newcommand{\N}[1]{\mathsf{N}\left(#1\right)} \newcommand{\E}[1]{\mathsf{E}\left(#1\right)} \newcommand{\V}[1]{\mathsf{Var}\left(#1\right)} \newcommand{\ols}[1]{\widehat{#1}} \newcommand{\cond}[1]{\left|#1\right.} \newcommand{\abs}[1]{\left|#1\right|} \]

GDP Forecasting…

## [1] 0.7583128

Dave last week

dave19 <- read.csv("Dave_190115.csv",stringsAsFactors=F)
dave19$Week <- as.Date(substr(dave19$Week,1,10))
plot(dave19$Week,dave19$david.cameron,
     main="Google Search Popularity of David Cameron",
     ylab="Search popularity",xlab="Date",type="l",ylim=range(0,20))

dave19.ts <- ts(dave19$david.cameron,start=c(2004,1),freq=52)
tsdisplay(dave19.ts)

dave19.arima <- auto.arima(dave19.ts)
dave19.forc <- forecast(dave19.arima,h=10)
plot(dave19.forc,include=100)

dave19.forc$mean
## Time Series:
## Start = c(2015, 5) 
## End = c(2015, 14) 
## Frequency = 52 
##  [1] 6.750417 5.771330 5.345202 5.159738 5.079018 5.043886 5.028596
##  [8] 5.021941 5.019045 5.017784
dave27 <- read.csv("Dave_270115.csv",stringsAsFactors=F)
tail(dave27,1)
##                        Week david.cameron
## 578 2015-01-25 - 2015-01-31             5

FTSE last week

ftse <- Quandl("YAHOO/INDEX_FTSE")
ftse <- ftse[order(ftse$Date),]
ftse.0 <- ftse[ftse$Date<as.Date("2015-01-23"),]
ftse.auto <- auto.arima(ftse$Close)
ftse.forc <- forecast(ftse.auto,h=50)
plot(ftse.forc,include=100)

ftse.forc$mean[seq(1,50,5)]
##  [1] 6848.440 6848.023 6851.560 6855.116 6858.673 6862.229 6865.786
##  [8] 6869.342 6872.899 6876.455
ftse[ftse$Date==as.Date("2015-01-23"),]
##         Date   Open   High    Low  Close    Volume Adjusted Close
## 2 2015-01-23 6796.6 6841.7 6796.6 6832.8 839368300         6832.8

Next week…

mgage <- read.csv("/home/readejj/Dropbox/Teaching/Reading/ec313/2015/Lecture-5/mortgages_270115.csv",stringsAsFactors=F)
mgage$DateTime <- as.Date(substr(mgage$DateTime,1,8),"%Y%m%d")
mgage$Consensus[mgage$DateTime=="2014-05-01"] <- mgage$Consensus[mgage$DateTime=="2014-05-01"]/1000
plot(mgage$DateTime,mgage$Actual,type="l",main="Mortgage Approvals since 2007",ylab="Number of Approvals",xlab="Date")
lines(mgage$DateTime,mgage$Consensus,type="l",col=2)
legend("topright",lty=1,col=c(1,2),legend=c("Actual","Consensus"))

Another one that’s tricky to call…

elec <- data.frame()
sheets <- c("43-45","45-50","50-51","51-55","55-59","59-64","64-66","66-70","70-74",
            "74","74-79","79-83","83-87","87-92","92-97","97-01","01-05","05-10","10-")
for( i in 1:NROW(sheets)) {
  temp <- read.xlsx("/home/readejj/Dropbox/Research/Misc/LVW Projects/UK Elections/data/Mark-Packs-opinion-polls-spreadsheet.xlsx",sheet=sheets[i])
  if(NROW(grep("X",colnames(temp)))>0) {
    delete.rows <- grep("X",colnames(temp))
    for(j in NROW(delete.rows):1){
      temp[delete.rows[j]] <- NULL
    }
  }
  if(i>1){
    if(NROW(setdiff(colnames(elec),colnames(temp)))>0){
      extra.rows <- setdiff(colnames(elec),colnames(temp))
      for(j in 1:NROW(extra.rows)){
        temp[[extra.rows[j]]] <- NA
      }
    }
    if(NROW(setdiff(colnames(temp),colnames(elec)))>0){
      extra.rows <- setdiff(colnames(temp),colnames(elec))
      for(j in 1:NROW(extra.rows)){
        elec[[extra.rows[j]]] <- NA
      }    
    }
  }
  elec <- rbind(elec,temp)
}
#next to tidy the resulting file up
elec$Year <- na.locf(elec$Year)
elec$Month <- na.locf(elec$Month)
elec$f.date <- as.Date(paste("01",tolower(substr(elec$Month,1,3)),elec$Year,sep="/"),
                       "%d/%b/%Y")

#create results file
elec.results <- elec[elec$Polling=="Result" | elec$Polling=="General election",]
elec.results <- elec.results[is.na(elec.results$Year)==F,c("Year","f.date","Con","Lab",
                                                           "LD","SDP","Green","Referendum")]
elec.results <- elec.results[duplicated(elec.results)==F,]
elec.results <- elec.results[elec.results$Year>1,]

#create monthly averages of opinion polls
elec.monthly <- aggregate(elec[,c("Con","Lab","LD")],by=list(elec$f.date),FUN=mean,na.rm=T)
colnames(elec.monthly) <- gsub("Group.1","date",colnames(elec.monthly))
elec.monthly <- elec.monthly[as.numeric(format(elec.monthly$date,"%Y"))>1940,]

#load up file from wikipedia of election information (e.g. turnout etc)
elec.outcomes <- read.csv("/home/readejj/Dropbox/Research/Big Data/twitter elections/election_outcomes.csv",
                          stringsAsFactors=F)

plot(elec.monthly$date,elec.monthly$Con,type="l",col="blue",
     main="Polling of Major Parties since 1943",
     ylab="Polled Vote Share", xlab="Date",
     ylim=range(c(elec.monthly$Con,elec.monthly$Lab,elec.monthly$LD),na.rm=T))
for(i in 1:NROW(elec.outcomes)) {
  abline(v = as.Date(elec.outcomes$date[i],"%d %B %Y"),lty=3)
}
lines(elec.results$f.date,elec.results$Con,type="p",pch=4,col="darkblue")
lines(elec.monthly$date,elec.monthly$Lab,type="l",col="red")
lines(elec.results$f.date,elec.results$Lab,type="p",pch=4,col="darkred")
lines(elec.monthly$date,elec.monthly$LD,type="l",col="yellow")
lines(elec.results$f.date,elec.results$LD,type="p",pch=4,col="orange")

Chapter 6: Time Series Decomposition

6.1 Time Series Components

Example Series

#par(mfrow=c(2,2))
#find UK sales
tourists_AU <- Quandl("EUROSTAT/TOUR_OCC_NIM_1")
plot(tourists_AU$Date,tourists_AU$Value,type="l",main="Tourists in Austria",ylab="Number of Tourists",xlab="Date")

tourists_US <- Quandl("BTS/AIRPASS")
plot(tourists_US$Date,tourists_US$Total,type="l",main="Air Passengers in the US",ylab="Number of Passengers",xlab="Date")

vehicle_sales <- Quandl("FRED/TOTALNSA")
plot(vehicle_sales$Date,vehicle_sales$Value,type="l",main="US Vehicle Sales",ylab="Number of Vehicle Sales",xlab="Date")

ftse$Close.1 <- c(NA,ftse$Close[-NROW(ftse)])
ftse$d.returns <- ftse$Close-ftse$Close.1
plot(ftse$Date,ftse$d.returns,main="FTSE Returns Since 1984",ylab="Daily Difference",xlab="Date",type="l")

#get exchange rate for trends (plus irregular)
gbpusd <- Quandl("BNP/USDGBP")
plot(gbpusd$Date,1/gbpusd$"USD/GBP",type="l",main="US Dollar Exchange Rate",ylab="Dollars per Pound",xlab="Date")

#unemployment for cyclical (plus seasonal and trends)
UKcc <- Quandl("UKONS/LMS_BCJB_M")
UKcc.eng.t <- ts(UKcc$Value[order(UKcc$Year)],start=c(1983,6),frequency=12)
plot(UKcc.eng.t,ylab="Claimants",xlab="Date",main="Claimant Count for England")

6.1 Time Series Components

Examples: Monthly Bike Hires

bike.data <- read.csv("bike_hires_130115.csv",stringsAsFactors=F)
bike.data$Number.of.Bicycle.Hires <- as.numeric(gsub(",","",bike.data$Number.of.Bicycle.Hires))
bike.data$Day <- as.Date(bike.data$Day,"%d/%m/%Y")
plot(bike.data$Day,bike.data$Number.of.Bicycle.Hires,main="Daily Bike Hires",ylab="Number of hires",xlab="Date",type="l")

bike.m <- aggregate(bike.data$Number.of.Bicycle.Hires,by=list(format(bike.data$Day,"%Y-%m")),FUN=sum)
bike.m.t <- ts(bike.m$x,start=c(2010,7),freq=12)
plot(bike.m.t,main="Monthly bike hires",ylab="Number of hires")

bike.m.t.stl <- stl(bike.m.t,t.window=12,s.window="periodic")
plot(bike.m.t.stl)

6.1 Seasonally Adjusted Data

UKcc.eng.t.stl <- stl(UKcc.eng.t,t.window=12,s.window="periodic", robust=TRUE)
plot(UKcc.eng.t,main="UK Claimant Count",ylab="Hundreds of thousands")
lines(seasadj(UKcc.eng.t.stl),col="red",ylab="Seasonally adjusted")
legend("topright",lty=1,col=c(1,2),legend=c("Original","Seasonally Adjusted"),bty="n")

vehicle_sales.t <- ts(vehicle_sales$Value[order(vehicle_sales$Date)],start=c(1976,1),freq=12)
vehicle_sales.t.stl <- stl(vehicle_sales.t,t.window=12,s.window="periodic", robust=TRUE)
plot(vehicle_sales.t,ylab="Seasonally adjusted",main="Vehicle Sales")
lines(seasadj(vehicle_sales.t.stl),col="red")
legend("topleft",lty=1,col=c(1,2),legend=c("Original","Seasonally Adjusted"),bty="n")

tourists_US.t <- ts(tourists_US$Total[order(tourists_US$Date)],start=c(2000,1),freq=12)
tourists_US.t.stl <- stl(tourists_US.t,t.window=12,s.window="periodic", robust=TRUE)
plot(tourists_US.t,ylab="Seasonally adjusted",main="US Air Passengers")
lines(seasadj(tourists_US.t.stl),col="red")
legend("bottomright",lty=1,col=c(1,2),legend=c("Original","Seasonally Adjusted"),bty="n")

6.2 Moving Averages

Moving Averages

vehicle_sales.t.ma <- ma(vehicle_sales.t, order=5)
vehicle_sales.t.ma50 <- ma(vehicle_sales.t, order=50)
vehicle_sales.t.ma100 <- ma(vehicle_sales.t, order=100)
plot(vehicle_sales.t,type="l",ylab="Number of Vehicles Sold",main="Moving Averages of Vehicles Sold")
lines(vehicle_sales.t.ma,col=2,lwd=3)
lines(vehicle_sales.t.ma50,col=3,lwd=3)
lines(vehicle_sales.t.ma100,col=4,lwd=3)
legend("topright",lty=1,col=c(1,2,3,4),legend=c("Actual","5-MA","50-MA","100-MA"),bty="n")

6.2 Moving Averages of Moving Averages

Cycles…

UKcc.eng.t.ma12x2 <- ma(UKcc.eng.t,order=12,centre=TRUE)
UKcc.eng.t.ma12 <- ma(UKcc.eng.t,order=12,centre=FALSE)
plot(window(UKcc.eng.t,start=2009),ylab="Thousands",xlab="Date",
     main="Claimant Counts in England since 2009; Symmetric and non-symmetric MAs")
lines(UKcc.eng.t.ma12,col=2)
lines(UKcc.eng.t.ma12x2,col=3)
legend("bottom",lty=1,col=c(1,2,3),legend=c("Actual","12-MA","2x12-MA"),bty="n")

tourists_US.t.ma12x2 <- ma(tourists_US.t,order=12,centre=TRUE)
tourists_US.t.ma12 <- ma(tourists_US.t,order=12,centre=FALSE)
plot(tourists_US.t,ylab="Thousands",xlab="Date",main="US Air Passengers")
lines(tourists_US.t.ma12,col=2)
lines(tourists_US.t.ma12x2,col=3)
legend("bottom",lty=1,col=c(1,2,3),legend=c("Actual","12-MA","2x12-MA"),bty="n")

6.2 Weighted Moving Averages

6.3 Classical Decomposition for Seasonality \(m\)

  1. If \(m\) even, compute trend-cycle component using 2x\(m\)-MA to obtain \(\ols{T}_t\).
    • If \(m\) odd, computer trend-cycle component using \(m\)-MA.
  2. Calculate detrended data series \(y_t - \ols{T}_t\).
  3. Estimate seasonal component by averaging detrended values for each period.
    • Monthly: Average for all January, February, , December observations in sample.
    • Adjust to ensure seasonal components sum to zero. Yields \(\ols{S}_t\).
  4. Remainder \(\ols{E}_t\) calculated using \(\ols{S}_t\) and \(\ols{T}_t\): \[ \ols{E}_t = y_t - \ols{T}_t - \ols{S}_t. \]

6.3 Classical Decomposition

bike.m.t.c <- decompose(bike.m.t,type="multiplicative")
plot(bike.m.t.c)

tourists_US.t.c <- decompose(tourists_US.t,type="multiplicative")
plot(tourists_US.t.c)

6.3 Problems with Classical Decomposition

6.4 X-12-ARIMA Decomposition

6.5 STL Decomposition

6.5 STL Decomposition

plot(stl(vehicle_sales.t,t.window=1,s.window=1, robust=TRUE))

plot(stl(vehicle_sales.t,t.window=10,s.window=50, robust=TRUE))

plot(stl(vehicle_sales.t,t.window=50,s.window=10, robust=TRUE))

plot(stl(vehicle_sales.t,t.window=100,s.window=100, robust=TRUE))

6.5 STL Decomposition

plot(stl(bike.m.t,t.window=25,s.window=25, robust=TRUE))

6.6 Forecasting with Decomposition

6.6 Forecasting Using Decomposition

bike.m.t0 <- window(bike.m.t,end=c(2014,6))
bike.m.t1 <- window(bike.m.t,start=c(2014,7))
bike.m.t.stl1 <- stl(bike.m.t0,t.window=25,s.window=25, robust=TRUE)
bike.m.t.stl1.f <- forecast(bike.m.t.stl1, method="naive",h=31)
plot(bike.m.t,main="Forecasting with Decomposition",
     ylab="Number of Bike Hires",xlab="Date")
lines(bike.m.t.stl1.f$mean,col=2)
lines(bike.m.t.stl1$time.series[,2],col=3)
lines(bike.m.t.stl1$time.series[,1]+bike.m.t.stl1$time.series[,2],col=4)
legend("topright",lty=1,col=c(1,2,3,4),
       legend=c("Actual","Naive Seasonal Forecast","Modelled Time Trend","Modelled Seasonal (plus trend)"),
       bty="n")

Example (if we have time)

Forecast Training

res.eng <- read.csv("historical.csv")
matches <- res.eng[res.eng$date=="2015-01-24",c("team1","team2","outcome","fitted")]
matches <- matches[is.na(matches$fitted)==F,]
matches$id <- 1:NROW(matches)
par(mar=c(9,4,4,5)+.1)
plot(matches$id,matches$fitted,xaxt="n",xlab="",ylim=range(0,1),main="Forecasts of Matches Last Saturday",
     ylab="Probability of Outcome and Outcome")
axis(1,at=matches$id,labels=paste(matches$team1,matches$team2,sep=" v "),las=2,cex.axis=0.65)
lines(matches$id,matches$outcome,col=2,type="p")
for(i in 1:NROW(matches)) {
  lines(rep(i,2),c(matches$fitted[i],matches$outcome[i]),type="l",lty=2,col=4)
}

forc.matches <- read.csv("forecasts.csv")
forc.matches <- forc.matches[is.na(forc.matches$outcome)==F,]
forc.matches$id <- 1:NROW(forc.matches)
plot(forc.matches$id,forc.matches$outcome,xaxt="n",xlab="",ylim=range(0,1),main="Forecasts of Matches Tonight",
     ylab="Probability of Outcome")
axis(1,at=forc.matches$id,labels=paste(forc.matches$team1,forc.matches$team2,sep=" v "),las=2,cex.axis=0.65)

Concluding and Tasks Ahead of Thursday