J James Reade
26/01/2015
\[ \newcommand{\N}[1]{\mathsf{N}\left(#1\right)} \newcommand{\E}[1]{\mathsf{E}\left(#1\right)} \newcommand{\V}[1]{\mathsf{Var}\left(#1\right)} \newcommand{\ols}[1]{\widehat{#1}} \newcommand{\cond}[1]{\left|#1\right.} \newcommand{\abs}[1]{\left|#1\right|} \]
dave19 <- read.csv("Dave_190115.csv",stringsAsFactors=F)
dave19$Week <- as.Date(substr(dave19$Week,1,10))
plot(dave19$Week,dave19$david.cameron,
main="Google Search Popularity of David Cameron",
ylab="Search popularity",xlab="Date",type="l",ylim=range(0,20))
dave19.ts <- ts(dave19$david.cameron,start=c(2004,1),freq=52)
tsdisplay(dave19.ts)
dave19.arima <- auto.arima(dave19.ts)
dave19.forc <- forecast(dave19.arima,h=10)
plot(dave19.forc,include=100)
dave19.forc$mean
## Time Series:
## Start = c(2015, 5)
## End = c(2015, 14)
## Frequency = 52
## [1] 6.750417 5.771330 5.345202 5.159738 5.079018 5.043886 5.028596
## [8] 5.021941 5.019045 5.017784
dave27 <- read.csv("Dave_270115.csv",stringsAsFactors=F)
tail(dave27,1)
## Week david.cameron
## 578 2015-01-25 - 2015-01-31 5
ftse <- Quandl("YAHOO/INDEX_FTSE")
ftse <- ftse[order(ftse$Date),]
ftse.0 <- ftse[ftse$Date<as.Date("2015-01-23"),]
ftse.auto <- auto.arima(ftse$Close)
ftse.forc <- forecast(ftse.auto,h=50)
plot(ftse.forc,include=100)
ftse.forc$mean[seq(1,50,5)]
## [1] 6848.440 6848.023 6851.560 6855.116 6858.673 6862.229 6865.786
## [8] 6869.342 6872.899 6876.455
ftse[ftse$Date==as.Date("2015-01-23"),]
## Date Open High Low Close Volume Adjusted Close
## 2 2015-01-23 6796.6 6841.7 6796.6 6832.8 839368300 6832.8
mgage <- read.csv("/home/readejj/Dropbox/Teaching/Reading/ec313/2015/Lecture-5/mortgages_270115.csv",stringsAsFactors=F)
mgage$DateTime <- as.Date(substr(mgage$DateTime,1,8),"%Y%m%d")
mgage$Consensus[mgage$DateTime=="2014-05-01"] <- mgage$Consensus[mgage$DateTime=="2014-05-01"]/1000
plot(mgage$DateTime,mgage$Actual,type="l",main="Mortgage Approvals since 2007",ylab="Number of Approvals",xlab="Date")
lines(mgage$DateTime,mgage$Consensus,type="l",col=2)
legend("topright",lty=1,col=c(1,2),legend=c("Actual","Consensus"))
elec <- data.frame()
sheets <- c("43-45","45-50","50-51","51-55","55-59","59-64","64-66","66-70","70-74",
"74","74-79","79-83","83-87","87-92","92-97","97-01","01-05","05-10","10-")
for( i in 1:NROW(sheets)) {
temp <- read.xlsx("/home/readejj/Dropbox/Research/Misc/LVW Projects/UK Elections/data/Mark-Packs-opinion-polls-spreadsheet.xlsx",sheet=sheets[i])
if(NROW(grep("X",colnames(temp)))>0) {
delete.rows <- grep("X",colnames(temp))
for(j in NROW(delete.rows):1){
temp[delete.rows[j]] <- NULL
}
}
if(i>1){
if(NROW(setdiff(colnames(elec),colnames(temp)))>0){
extra.rows <- setdiff(colnames(elec),colnames(temp))
for(j in 1:NROW(extra.rows)){
temp[[extra.rows[j]]] <- NA
}
}
if(NROW(setdiff(colnames(temp),colnames(elec)))>0){
extra.rows <- setdiff(colnames(temp),colnames(elec))
for(j in 1:NROW(extra.rows)){
elec[[extra.rows[j]]] <- NA
}
}
}
elec <- rbind(elec,temp)
}
#next to tidy the resulting file up
elec$Year <- na.locf(elec$Year)
elec$Month <- na.locf(elec$Month)
elec$f.date <- as.Date(paste("01",tolower(substr(elec$Month,1,3)),elec$Year,sep="/"),
"%d/%b/%Y")
#create results file
elec.results <- elec[elec$Polling=="Result" | elec$Polling=="General election",]
elec.results <- elec.results[is.na(elec.results$Year)==F,c("Year","f.date","Con","Lab",
"LD","SDP","Green","Referendum")]
elec.results <- elec.results[duplicated(elec.results)==F,]
elec.results <- elec.results[elec.results$Year>1,]
#create monthly averages of opinion polls
elec.monthly <- aggregate(elec[,c("Con","Lab","LD")],by=list(elec$f.date),FUN=mean,na.rm=T)
colnames(elec.monthly) <- gsub("Group.1","date",colnames(elec.monthly))
elec.monthly <- elec.monthly[as.numeric(format(elec.monthly$date,"%Y"))>1940,]
#load up file from wikipedia of election information (e.g. turnout etc)
elec.outcomes <- read.csv("/home/readejj/Dropbox/Research/Big Data/twitter elections/election_outcomes.csv",
stringsAsFactors=F)
plot(elec.monthly$date,elec.monthly$Con,type="l",col="blue",
main="Polling of Major Parties since 1943",
ylab="Polled Vote Share", xlab="Date",
ylim=range(c(elec.monthly$Con,elec.monthly$Lab,elec.monthly$LD),na.rm=T))
for(i in 1:NROW(elec.outcomes)) {
abline(v = as.Date(elec.outcomes$date[i],"%d %B %Y"),lty=3)
}
lines(elec.results$f.date,elec.results$Con,type="p",pch=4,col="darkblue")
lines(elec.monthly$date,elec.monthly$Lab,type="l",col="red")
lines(elec.results$f.date,elec.results$Lab,type="p",pch=4,col="darkred")
lines(elec.monthly$date,elec.monthly$LD,type="l",col="yellow")
lines(elec.results$f.date,elec.results$LD,type="p",pch=4,col="orange")
par(mar=c(5,4,4,5)+.1)
plot(elec.monthly$date,elec.monthly$Con+elec.monthly$Lab+elec.monthly$LD,type="l",
main="Combined (Opinion) Polling Share of Major Parties since 1943",
ylab="Polled Vote Share (black line)", xlab="Date")
for(i in 1:NROW(elec.outcomes)) {
abline(v = as.Date(elec.outcomes$date[i],"%d %B %Y"),lty=3)
}
par(new=TRUE)
plot(as.Date(elec.outcomes$date,"%d %B %Y"),elec.outcomes$turnout,type="p",pch=4,
col="red",xaxt="n",yaxt="n",xlab="",ylab="",
xlim=range(elec.monthly$date))
axis(4)
mtext("Turnout (red crosses)",side=4,line=3)
#par(mfrow=c(2,2))
#find UK sales
tourists_AU <- Quandl("EUROSTAT/TOUR_OCC_NIM_1")
plot(tourists_AU$Date,tourists_AU$Value,type="l",main="Tourists in Austria",ylab="Number of Tourists",xlab="Date")
tourists_US <- Quandl("BTS/AIRPASS")
plot(tourists_US$Date,tourists_US$Total,type="l",main="Air Passengers in the US",ylab="Number of Passengers",xlab="Date")
vehicle_sales <- Quandl("FRED/TOTALNSA")
plot(vehicle_sales$Date,vehicle_sales$Value,type="l",main="US Vehicle Sales",ylab="Number of Vehicle Sales",xlab="Date")
ftse$Close.1 <- c(NA,ftse$Close[-NROW(ftse)])
ftse$d.returns <- ftse$Close-ftse$Close.1
plot(ftse$Date,ftse$d.returns,main="FTSE Returns Since 1984",ylab="Daily Difference",xlab="Date",type="l")
#get exchange rate for trends (plus irregular)
gbpusd <- Quandl("BNP/USDGBP")
plot(gbpusd$Date,1/gbpusd$"USD/GBP",type="l",main="US Dollar Exchange Rate",ylab="Dollars per Pound",xlab="Date")
#unemployment for cyclical (plus seasonal and trends)
UKcc <- Quandl("UKONS/LMS_BCJB_M")
UKcc.eng.t <- ts(UKcc$Value[order(UKcc$Year)],start=c(1983,6),frequency=12)
plot(UKcc.eng.t,ylab="Claimants",xlab="Date",main="Claimant Count for England")
bike.data <- read.csv("bike_hires_130115.csv",stringsAsFactors=F)
bike.data$Number.of.Bicycle.Hires <- as.numeric(gsub(",","",bike.data$Number.of.Bicycle.Hires))
bike.data$Day <- as.Date(bike.data$Day,"%d/%m/%Y")
plot(bike.data$Day,bike.data$Number.of.Bicycle.Hires,main="Daily Bike Hires",ylab="Number of hires",xlab="Date",type="l")
bike.m <- aggregate(bike.data$Number.of.Bicycle.Hires,by=list(format(bike.data$Day,"%Y-%m")),FUN=sum)
bike.m.t <- ts(bike.m$x,start=c(2010,7),freq=12)
plot(bike.m.t,main="Monthly bike hires",ylab="Number of hires")
bike.m.t.stl <- stl(bike.m.t,t.window=12,s.window="periodic")
plot(bike.m.t.stl)
UKcc.eng.t.stl <- stl(UKcc.eng.t,t.window=12,s.window="periodic", robust=TRUE)
plot(UKcc.eng.t,main="UK Claimant Count",ylab="Hundreds of thousands")
lines(seasadj(UKcc.eng.t.stl),col="red",ylab="Seasonally adjusted")
legend("topright",lty=1,col=c(1,2),legend=c("Original","Seasonally Adjusted"),bty="n")
vehicle_sales.t <- ts(vehicle_sales$Value[order(vehicle_sales$Date)],start=c(1976,1),freq=12)
vehicle_sales.t.stl <- stl(vehicle_sales.t,t.window=12,s.window="periodic", robust=TRUE)
plot(vehicle_sales.t,ylab="Seasonally adjusted",main="Vehicle Sales")
lines(seasadj(vehicle_sales.t.stl),col="red")
legend("topleft",lty=1,col=c(1,2),legend=c("Original","Seasonally Adjusted"),bty="n")
tourists_US.t <- ts(tourists_US$Total[order(tourists_US$Date)],start=c(2000,1),freq=12)
tourists_US.t.stl <- stl(tourists_US.t,t.window=12,s.window="periodic", robust=TRUE)
plot(tourists_US.t,ylab="Seasonally adjusted",main="US Air Passengers")
lines(seasadj(tourists_US.t.stl),col="red")
legend("bottomright",lty=1,col=c(1,2),legend=c("Original","Seasonally Adjusted"),bty="n")
vehicle_sales.t.ma <- ma(vehicle_sales.t, order=5)
vehicle_sales.t.ma50 <- ma(vehicle_sales.t, order=50)
vehicle_sales.t.ma100 <- ma(vehicle_sales.t, order=100)
plot(vehicle_sales.t,type="l",ylab="Number of Vehicles Sold",main="Moving Averages of Vehicles Sold")
lines(vehicle_sales.t.ma,col=2,lwd=3)
lines(vehicle_sales.t.ma50,col=3,lwd=3)
lines(vehicle_sales.t.ma100,col=4,lwd=3)
legend("topright",lty=1,col=c(1,2,3,4),legend=c("Actual","5-MA","50-MA","100-MA"),bty="n")
UKcc.eng.t.ma12x2 <- ma(UKcc.eng.t,order=12,centre=TRUE)
UKcc.eng.t.ma12 <- ma(UKcc.eng.t,order=12,centre=FALSE)
plot(window(UKcc.eng.t,start=2009),ylab="Thousands",xlab="Date",
main="Claimant Counts in England since 2009; Symmetric and non-symmetric MAs")
lines(UKcc.eng.t.ma12,col=2)
lines(UKcc.eng.t.ma12x2,col=3)
legend("bottom",lty=1,col=c(1,2,3),legend=c("Actual","12-MA","2x12-MA"),bty="n")
tourists_US.t.ma12x2 <- ma(tourists_US.t,order=12,centre=TRUE)
tourists_US.t.ma12 <- ma(tourists_US.t,order=12,centre=FALSE)
plot(tourists_US.t,ylab="Thousands",xlab="Date",main="US Air Passengers")
lines(tourists_US.t.ma12,col=2)
lines(tourists_US.t.ma12x2,col=3)
legend("bottom",lty=1,col=c(1,2,3),legend=c("Actual","12-MA","2x12-MA"),bty="n")
bike.m.t.c <- decompose(bike.m.t,type="multiplicative")
plot(bike.m.t.c)
tourists_US.t.c <- decompose(tourists_US.t,type="multiplicative")
plot(tourists_US.t.c)
x12 R package.plot(stl(vehicle_sales.t,t.window=1,s.window=1, robust=TRUE))
plot(stl(vehicle_sales.t,t.window=10,s.window=50, robust=TRUE))
plot(stl(vehicle_sales.t,t.window=50,s.window=10, robust=TRUE))
plot(stl(vehicle_sales.t,t.window=100,s.window=100, robust=TRUE))
plot(stl(bike.m.t,t.window=25,s.window=25, robust=TRUE))
bike.m.t0 <- window(bike.m.t,end=c(2014,6))
bike.m.t1 <- window(bike.m.t,start=c(2014,7))
bike.m.t.stl1 <- stl(bike.m.t0,t.window=25,s.window=25, robust=TRUE)
bike.m.t.stl1.f <- forecast(bike.m.t.stl1, method="naive",h=31)
plot(bike.m.t,main="Forecasting with Decomposition",
ylab="Number of Bike Hires",xlab="Date")
lines(bike.m.t.stl1.f$mean,col=2)
lines(bike.m.t.stl1$time.series[,2],col=3)
lines(bike.m.t.stl1$time.series[,1]+bike.m.t.stl1$time.series[,2],col=4)
legend("topright",lty=1,col=c(1,2,3,4),
legend=c("Actual","Naive Seasonal Forecast","Modelled Time Trend","Modelled Seasonal (plus trend)"),
bty="n")
res.eng <- read.csv("historical.csv")
matches <- res.eng[res.eng$date=="2015-01-24",c("team1","team2","outcome","fitted")]
matches <- matches[is.na(matches$fitted)==F,]
matches$id <- 1:NROW(matches)
par(mar=c(9,4,4,5)+.1)
plot(matches$id,matches$fitted,xaxt="n",xlab="",ylim=range(0,1),main="Forecasts of Matches Last Saturday",
ylab="Probability of Outcome and Outcome")
axis(1,at=matches$id,labels=paste(matches$team1,matches$team2,sep=" v "),las=2,cex.axis=0.65)
lines(matches$id,matches$outcome,col=2,type="p")
for(i in 1:NROW(matches)) {
lines(rep(i,2),c(matches$fitted[i],matches$outcome[i]),type="l",lty=2,col=4)
}
forc.matches <- read.csv("forecasts.csv")
forc.matches <- forc.matches[is.na(forc.matches$outcome)==F,]
forc.matches$id <- 1:NROW(forc.matches)
plot(forc.matches$id,forc.matches$outcome,xaxt="n",xlab="",ylim=range(0,1),main="Forecasts of Matches Tonight",
ylab="Probability of Outcome")
axis(1,at=forc.matches$id,labels=paste(forc.matches$team1,forc.matches$team2,sep=" v "),las=2,cex.axis=0.65)