Igor — Apr 29, 2015, 8:50 PM
rm(list=ls()) # clears out memory
setwd("~/projects")
refresh_data <- F
# Predictors <- c("OIL","OIL2","EURUSD","NI","AL","CU","AU");
# Predictors <- c("OIL","OIL2","EURUSD","METALS");
## Predictors <- c("OIL","REPO","EURUSD","GOOG","RES");
# Predictors <- c("OIL","OIL2","EURUSD","RES", "IMPORT","dOIL");
# Predictors <- c("OIL","dOIL","EURUSD","GOOG","RES","IMPORT","REPO");
Predictors <- c("OIL","dOIL","EURUSD","GOOG","RES","REPO","METALS");
# Predictors <- c("OIL","OIL2")
Response <- "RUBUSD"
start_date="2014-06-01"
final_date="2015-04-14"
#final_date="2015-04-01"
#final_date="2015-04-20"
plot_xaxis="OIL"
BCP=-1.5 # Box-Cox transformation parameter
outliers <- c(135,136,137,138,139,140)
library(moments)
usd.file="./QUANDL_USDRUB.csv"
eur.file="./QUANDL_EURUSD.csv"
oil.file="./QUANDL_OIL.csv"
ni.file="./QUANDL_NI.csv"
al.file="./QUANDL_AL.csv"
cu.file="./QUANDL_CU.csv"
au.file="./QUANDL_AU.csv"
U1="https://www.quandl.com/api/v1/datasets/"
U2=".csv?trim_start="
if(refresh_data)
{
url_usd=paste(U1,"CURRFX/USDRUB",U2,start_date,sep="")
download.file(url_usd, usd.file, "wget")
url_eur=paste(U1,"CURRFX/EURUSD",U2,start_date,sep="")
download.file(url_eur, eur.file, "wget")
url_oil=paste(U1,"OPEC/ORB",U2,start_date,sep="")
download.file(url_oil, oil.file, "wget")
url_al=paste(U1,"OFDP/ALUMINIUM_21",U2,start_date,sep="")
download.file(url_al, al.file, "wget")
url_ni=paste(U1,"OFDP/NICKEL_41",U2,start_date,sep="")
download.file(url_ni, ni.file, "wget")
url_cu=paste(U1,"OFDP/COPPER_6",U2,start_date,sep="")
download.file(url_cu, cu.file, "wget")
url_au=paste(U1,"WSJ/AU_EIB",U2,start_date,sep="")
download.file(url_au, au.file, "wget")
}
## forex
usd_ <- read.csv(usd.file, header = TRUE, sep = ",")
eur_ <- read.csv(eur.file, header = TRUE, sep = ",")
## energy
oil_ <- read.csv(oil.file, header = TRUE, sep = ",")
## metals
al_ <- read.csv(al.file, header = TRUE, sep = ",")
ni_ <- read.csv(ni.file, header = TRUE, sep = ",")
cu_ <- read.csv(cu.file, header = TRUE, sep = ",")
au_ <- read.csv(au.file, header = TRUE, sep = ",")
## ===================================================
NN=length(usd_$Date)
usd.data <- data.frame(DATE=as.Date(usd_$Date),
USDRUB=usd_$Rate,
RUBUSD=1/usd_$Rate)
dusd.data <-
data.frame(DATE=as.Date(usd.data$DATE[-NN]),
dUSDRUB=diff(usd.data$USDRUB)/usd.data$USDRUB[-NN],
dRUBUSD=diff(usd.data$RUBUSD)/usd.data$RUBUSD[-NN]);
usd.data <- merge(usd.data, dusd.data, by="DATE");
## ===================================================
NN=length(eur_$Date)
eur.data <- data.frame(DATE=as.Date(eur_$Date),
EURUSD=eur_$Rate,
USDEUR=1/eur_$Rate);
deur.data <-
data.frame(DATE=as.Date(eur.data$DATE[-NN]),
dUSDEUR=diff(eur.data$USDEUR)/eur.data$USDEUR[-NN],
dEURUSD=diff(eur.data$EURUSD)/eur.data$EURUSD[-NN]);
eur.data <- merge(eur.data, deur.data, by="DATE");
## ===================================================
NN=length(oil_$Date)
oil.data <- data.frame(DATE=as.Date(oil_$Date[-NN]),
OIL=oil_$Value[-NN],
dOIL=diff(oil_$Value)/oil_$Value[-NN])
al.data <- data.frame(DATE=as.Date(al_$Date), AL=al_$Mid)
ni.data <- data.frame(DATE=as.Date(ni_$Date), NI=ni_$Mid)
cu.data <- data.frame(DATE=as.Date(cu_$Date), CU=cu_$Mid)
au.data <- data.frame(DATE=as.Date(au_$Date), AU=au_$Value)
merged.data <- merge(usd.data, eur.data, by="DATE")
merged.data <- merge(merged.data, oil.data, by="DATE")
merged.data <- merge(merged.data, al.data, by="DATE")
merged.data <- merge(merged.data, ni.data, by="DATE")
merged.data <- merge(merged.data, cu.data, by="DATE")
merged.data <- merge(merged.data, au.data, by="DATE")
##===================================================================
## Plot Crude Oil Prices (sort "Brent")
par(mfrow=c(1,1))
plot(oil.data$DATE, oil.data$OIL,
main="Oil Prices",
type="b", pch=16, lwd=2,
xlab="Dates",ylab="Oil Prices ($)")
points(oil.data$DATE,oil.data$OIL,
col="red",lwd=2)
grid()
##===================================================================
## Plot Exchange Rate data (USDRUB)
par(mfrow=c(1,1))
plot(usd.data$DATE, usd.data$USDRUB,
main="Exchange Rate USDRUB",
type="b", pch=16, lwd=2,
xlab="Dates",ylab="USDRUB")
points(usd.data$DATE,usd.data$USDRUB,
col="red",lwd=2)
grid()
##===================================================================
## Plot Exchange Rate data (RUBUSD)
par(mfrow=c(1,1))
plot(usd.data$DATE, usd.data$RUBUSD,
main="Exchange Rate RUBUSD",
type="b", pch=16, lwd=2,
xlab="Dates",ylab="RUBUSD")
points(usd.data$DATE,usd.data$RUBUSD,
col="red",lwd=2)
grid()
##===================================================================
## Download and plot Reserves of Russian Central Bank
reserves.file <- "ru_cbank_reserves.csv"
reserves.csv <- read.csv(reserves.file, header = TRUE, sep = ",")
reserves_ <- data.frame(DATE=as.Date(reserves.csv$Date,
format="%d/%m/%Y"),
Reserves=reserves.csv$Amount);
reserves.interp <- spline(reserves_$DATE,reserves_$Reserves,
xout=usd.data$DATE,method="natural")$y
NN=length(usd.data$DATE)
reserves.data <- data.frame(DATE=usd.data$DATE[-NN],
RES=reserves.interp[-NN],
dRES=diff(reserves.interp)/reserves.interp[-NN])
par(mfrow=c(1,1))
plot(usd.data$DATE,reserves.interp,
main="International Reserves of Russia",
type="b", pch=16, lwd=2,
xlab="Dates",ylab="Reserves (billions USD)")
grid()
points(reserves_$DATE,reserves_$Reserves,col="red",lwd=4)
merged.data <- merge(merged.data, reserves.data, by="DATE")
##===================================================================
imports.file <- "russia_imports.csv"
imports.csv <- read.csv(imports.file, header = TRUE, sep = ",")
imports_ <- data.frame(DATE=as.Date(imports.csv$Date,
format="%d/%m/%Y"),
Imports=imports.csv$Imports);
imports.interp <- spline(imports_$DATE,imports_$Imports,
xout=usd.data$DATE,method="natural")$y
NN=length(usd.data$DATE)
imports.data <- data.frame(DATE=usd.data$DATE[-NN],
IMPORT=imports.interp[-NN],
dIMPORT=diff(imports.interp)/imports.interp[-NN])
par(mfrow=c(1,1))
plot(usd.data$DATE,imports.interp,
main="Imports of Russia",
type="b", pch=16, lwd=2,
xlab="Dates",ylab="Imports (millions USD)")
grid()
points(imports_$DATE,imports_$Imports,col="red",lwd=4)
merged.data <- merge(merged.data, imports.data, by="DATE")
##===================================================================
## http://www.cbr.ru/eng/hd_base/Default.aspx?Prtid=micex_doc
tvolume.file <- "ru_cbank_usdrub_tvolume.csv"
tvolume.csv <- read.csv(tvolume.file, header = TRUE, sep = ",")
tvolume_ <- data.frame(DATE=as.Date(tvolume.csv$Date,
format="%d/%m/%Y"),
Tvolume=tvolume.csv$TVol);
tvolume.interp <- approx(tvolume_$DATE,tvolume_$Tvolume,
xout=usd.data$DATE, rule=2,
method="constant")$y
NN=length(usd.data$DATE)
tvolume.data <- data.frame(DATE=usd.data$DATE[-NN],
TVOL=tvolume.interp[-NN],
dTVOL=diff(tvolume.interp)/tvolume.interp[-NN])
par(mfrow=c(1,1))
plot(usd.data$DATE,tvolume.interp,
main="Trade Volume",
type="h", pch=16, lwd=2,
xlab="Dates",ylab="Trade Volume")
grid()
points(tvolume_$DATE,tvolume_$Tvolume,col="red",lwd=4)
merged.data <- merge(merged.data, tvolume.data, by="DATE")
## ##===================================================================
## http://www.cbr.ru/Eng/hd_base/Default.aspx?Prtid=swapmonthtotal
fxswap.file <- "ru_cbank_fxswaps_volume.csv"
fxswap.csv <- read.csv(fxswap.file, header = TRUE, sep = ",")
fxswap_ <- data.frame(DATE=as.Date(fxswap.csv$Date,
format="%d/%m/%Y"),
Fxswap=fxswap.csv$SWAP.RUB);
fxswap.interp <- approx(fxswap_$DATE,fxswap_$Fxswap,
xout=usd.data$DATE, rule=2,
method="constant")$y
fxswap.data <- data.frame(DATE=usd.data$DATE,
FXSWAP=fxswap.interp)
par(mfrow=c(1,1))
plot(usd.data$DATE,fxswap.interp,
main="FX Swap Volume",
type="h", pch=16, lwd=2,
xlab="Dates",ylab="FX Volume")
grid()
points(fxswap_$DATE,fxswap_$Fxswap,col="red",lwd=4)
merged.data <- merge(merged.data, fxswap.data, by="DATE")
## ##===================================================================
google.ru.file <- "google_trend_search_rate_russia.csv"
google.ru.csv <- read.csv(google.ru.file, header = TRUE, sep = ",")
google.ru_ <- data.frame(DATE=as.Date(google.ru.csv$Date,
format="%Y-%m-%d"),
Google.Ru=google.ru.csv$Searches);
google.ru.interp <- spline(google.ru_$DATE,google.ru_$Google.Ru,
xout=usd.data$DATE,method="natural")$y
NN=length(usd.data$DATE)
google.ru.data <- data.frame(DATE=usd.data$DATE[-NN],
GOOG=google.ru.interp[-NN],
dGOOG=diff(google.ru.interp)/google.ru.interp[-NN])
par(mfrow=c(1,1))
plot(usd.data$DATE,google.ru.interp,
main="Google Search Trend for 'Russia'",
type="b", pch=16, lwd=2,
xlab="Dates",ylab="Num. of Searches")
grid()
points(google.ru_$DATE,google.ru_$Google.Ru,col="red",lwd=4)
merged.data <- merge(merged.data, google.ru.data, by="DATE")
##===================================================================
## data from http://www.cbr.ru/eng/hd_base/Default.aspx?Prtid=repo
repo.rate.file <- "ru_cbank_repo_rates_7d.csv"
repo.rate.csv <- read.csv(repo.rate.file, header = TRUE, sep = ",")
repo.rate_ <- data.frame(DATE=as.Date(repo.rate.csv$Date,
format="%d/%m/%Y"),
Repo.Rate=repo.rate.csv$RepoRate);
repo.rate.interp <- approx(repo.rate_$DATE,repo.rate_$Repo.Rate,
xout=usd.data$DATE, rule=2,
method="constant")$y
NN=length(usd.data$DATE)
repo.rate.data <- data.frame(DATE=usd.data$DATE,
REPO=repo.rate.interp)
par(mfrow=c(1,1))
plot(usd.data$DATE,repo.rate.interp,
main="Repo Rates",
type="b", pch=16, lwd=2,
xlab="Dates",ylab="Repo Rate")
grid()
points(repo.rate_$DATE,repo.rate_$Repo.Rate,col="red",lwd=4)
merged.data <- merge(merged.data, repo.rate.data, by="DATE")
##===================================================================
## data from http://www.levada.ru/indeksy
opinion.file <- "opinion_about_russia.csv"
opinion.csv <- read.csv(opinion.file, header = TRUE, sep = ",")
opinion_ <- data.frame(DATE=as.Date(opinion.csv$Date,
format="%d/%m/%Y"),
Opinion=opinion.csv$Right);
opinion.interp <- spline(opinion_$DATE,opinion_$Opinion,
xout=usd.data$DATE,method="natural")$y
NN=length(usd.data$DATE)
opinion.data <- data.frame(DATE=usd.data$DATE[-NN],
OPIN=opinion.interp[-NN],
dOPIN=diff(opinion.interp)/opinion.interp[-NN])
par(mfrow=c(1,1))
plot(usd.data$DATE,opinion.interp,
main="Opinion of Russians of situation in the country",
type="b", pch=16, lwd=2,
xlab="Dates",ylab="Percentage of optimists")
grid()
points(opinion_$DATE,opinion_$Opinion,col="red",lwd=4)
merged.data <- merge(merged.data, opinion.data, by="DATE")
##===================================================================
##===================================================================
X = merged.data[-outliers,]
##===================================================================
FOO <- function(U){1/U}
BAR <- function(V){1/V} # inverse of FOO
X = data.frame(X, "Y"= (FOO(X[[Response]])^BCP-1)/BCP,
"OIL2" = X$OIL^2,
"METALS" = X$AL/mean(X$AL)+
X$CU/mean(X$CU)+
X$NI/mean(X$NI)+
X$AU/mean(X$AU))
MX <- apply(X[,Predictors],2,mean);
Ntot=dim(X)[1]
Xpast = subset(X,X$DATE < as.Date(final_date))
Npast=dim(Xpast)[1]
##===================================================================
## Plot Prices Of Metals
par(mfrow=c(1,1))
plot(X$DATE, X$METALS,
main="Prices of metals",
type="b", pch=16, lwd=2,
xlab="Dates",ylab="METALS")
points(X$DATE, X$METALS,
col="red",lwd=2)
grid()
##===================================================================
### Factor analysis:
par(mfrow=c(1,1))
plot(X[,Predictors], col="darkred", main="Predictor Variables")
Nfactors=2
fmodel <- factanal(X[,Predictors], Nfactors, rotation="varimax");
cat("Factor model"); print(fmodel)
Factor model
Call:
factanal(x = X[, Predictors], factors = Nfactors, rotation = "varimax")
Uniquenesses:
OIL dOIL EURUSD GOOG RES REPO METALS
0.060 0.969 0.020 0.519 0.009 0.074 0.074
Loadings:
Factor1 Factor2
OIL 0.948 0.205
dOIL 0.174
EURUSD 0.867 0.479
GOOG -0.691
RES 0.918 0.386
REPO -0.959
METALS 0.875 0.401
Factor1 Factor2
SS loadings 4.654 0.621
Proportion Var 0.665 0.089
Cumulative Var 0.665 0.754
Test of the hypothesis that 2 factors are sufficient.
The chi square statistic is 31.73 on 8 degrees of freedom.
The p-value is 0.000104
FLOAD <- t(fmodel$loadings[,1:Nfactors])
barplot(FLOAD,beside=T,col=c("#FF5500","#55AA55"),
main="Loadings of the Factors",xlab="Component",ylab="A(i)")
legend("bottomright",rownames(FLOAD),
fill=c("#FF5500","#55AA55"))
##===================================================================
#### Prinicipal component analysis
PCA <- prcomp(X[,Predictors])
print(summary(PCA))
Importance of components:
PC1 PC2 PC3 PC4 PC5 PC6
Standard deviation 401.3944 38.70358 6.38254 1.33004 0.07477 0.01906
Proportion of Variance 0.9905 0.00921 0.00025 0.00001 0.00000 0.00000
Cumulative Proportion 0.9905 0.99974 0.99999 1.00000 1.00000 1.00000
PC7
Standard deviation 0.01649
Proportion of Variance 0.00000
Cumulative Proportion 1.00000
plot(PCA, main="Principal Components")
## plot(PCA$rotation[,1], type="h", lwd=4, col="red",
## ylim=c(-1,1),
## main="Principal components loadings")
## lines(PCA$rotation[,2], type="h", lwd=4, col="green")
## lines(PCA$rotation[,3], type="h", lwd=4, col="blue")
PCA.DF <- t(PCA$rotation[,1:2])
barplot(PCA.DF,beside=T,col=c("#FF5500","#55AA55"),
main="Loadings of Principal Components",
xlab="Component",ylab="A(i)")
abline(0,0,lwd=0.5)
legend("topright",rownames(PCA.DF),
fill=c("#FF5500","#55AA55"))
##===================================================================
### Linear Regression:
Formula <- formula(paste("Y ~ ", paste(Predictors, collapse=" + ")))
LM1=lm(Formula, Xpast)
print(summary(LM1)) #summary of results
Call:
lm(formula = Formula, data = Xpast)
Residuals:
Min 1Q Median 3Q Max
-1.803e-04 -4.415e-05 -4.322e-06 5.438e-05 1.659e-04
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) 6.669e-01 1.936e-04 3445.529 < 2e-16 ***
OIL -2.920e-05 8.153e-07 -35.808 < 2e-16 ***
dOIL -8.840e-04 2.810e-04 -3.146 0.00192 **
EURUSD 8.249e-04 2.591e-04 3.184 0.00170 **
GOOG 2.858e-07 1.765e-08 16.198 < 2e-16 ***
RES -5.543e-06 7.726e-07 -7.175 1.52e-11 ***
REPO -2.263e-05 3.728e-06 -6.070 6.64e-09 ***
METALS 2.776e-04 6.440e-05 4.311 2.58e-05 ***
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
Residual standard error: 6.797e-05 on 193 degrees of freedom
Multiple R-squared: 0.9928, Adjusted R-squared: 0.9925
F-statistic: 3785 on 7 and 193 DF, p-value: < 2.2e-16
## create plots
par(mfrow=c(1,1))
library(MASS)
if(BCP != 1)
{
boxcox(LM1, lambda=seq(-2.5, 4.5, 0.1))
}
par(mfrow=c(1,1))
hist(LM1$residuals,nclass=24, freq=F, col="lightgreen",
main="Histogram of Residuals", xlab="Residuals")
DENS=density(LM1$residuals)
lines(DENS$x,DENS$y,lwd=5, col="darkblue")
par(mfrow=c(1,1))
plot(LM1, which=c(4))
par(mfrow=c(2,3)) #drawing in 2 by 2 format
plot(LM1, which=c(1,2,3,4,5,6))
par(mfrow=c(1,1))
tfirst = X$DATE[1]
tlast = X$DATE[Ntot]
xmin <- apply(X,2,min)
xmax <- apply(X,2,max)
MATX <- cbind(1,X[,Predictors]);
YHAT <- LM1$coef %*% t(MATX);
XOUT <- MATX[,plot_xaxis];
YOUT <- BAR((1+BCP*YHAT)^(1/BCP));
dR=max(X[[Response]])-min(X[[Response]]);
ymin=min(X[[Response]])-0.1*dR;
ymax=max(X[[Response]])+0.1*dR;
plot(X[[plot_xaxis]], X[[Response]],
type="p",col="red",lwd=4,
xlab=plot_xaxis, ylab=Response,
ylim=c(ymin,ymax),
main=sprintf("%s from %s to %s",Response,tfirst,tlast))
grid();
points(XOUT,YOUT, lwd=4, col="magenta")
points(X[Ntot,plot_xaxis],X[Ntot,Response], type = "p", col="darkred", lwd=6, pch = 21)
text(X[Ntot,plot_xaxis],X[Ntot,Response], "â—€ H", adj=-0.2, col="black")
points(XOUT[Ntot],YOUT[Ntot], type = "p", col="darkmagenta", lwd=6, pch = 21)
text(XOUT[Ntot],YOUT[Ntot], "â—€ M",adj=-0.2, col="black")
legend("topleft",inset = 0.05, c("History ","Model "),
pch=c(1,1), lty=c(0,0), lwd=c(4,4), col=c("red","magenta"))
cat("mean=", mean(LM1$residual),"\n")
mean= -6.172331e-22
cat("st.dev.=", sd(LM1$residual),"\n")
st.dev.= 6.677037e-05
cat("skewness=", skewness(LM1$residual),"\n")
skewness= -0.0008588909
cat("kurtosis=", kurtosis(LM1$residual),"\n")
kurtosis= 2.544704
print(shapiro.test(LM1$residual))
Shapiro-Wilk normality test
data: LM1$residual
W = 0.9931, p-value = 0.4657
library(nortest)
print(ad.test(LM1$residual))
Anderson-Darling normality test
data: LM1$residual
A = 0.4186, p-value = 0.325
## Plot first set of data and draw its axis
DATES=as.Date(X[["DATE"]])
par(mar = c(5, 4, 4, 4) + 0.3)
plot(DATES,X[[Response]],
main=sprintf("Time Series of %s and %s",
Response,plot_xaxis),
axes=FALSE, type="l", lwd=5, col="red",
xlab="", ylab="")
axis(2, col="red", col.axis="red", las=1)
mtext(Response,side=2,col="red",line=3)
lines(DATES,YOUT, lwd=5, lty=1, col="magenta")
## Allow a second plot on the same graph
par(new=TRUE)
## Plot the second plot and put axis scale on right
plot(DATES,X[[plot_xaxis]],
axes=FALSE, type="l",lwd=3,lty=1,col="blue", xlab="", ylab="")
axis(4, col="blue", col.axis="blue", las=1)
mtext(plot_xaxis,side=4,line=2.5, col="blue")
box();
## Draw the time axis
RNG=seq(DATES[1],DATES[Ntot],by="month")
axis.Date(1,at=RNG,format="%b")
mtext("Date",side=1,col="black",line=3)
abline(v=as.Date(final_date),col="grey2",lty=2,lwd=3)
grid(NULL,NA)
legend("bottomleft",inset = 0.05, c(plot_xaxis,Response,"Model"),
lty=c(1,1,1), lwd=c(3,4,5), col=c("blue","red","magenta"))
#===================================================================
YH_t <- X[[Response]];
YR_t <- YOUT;
EPS_t <- X[[Response]]-YOUT;
EPS_s <- EPS_t[1:Npast];
EPS_p <- EPS_t[(Npast+1):Ntot];
rel_error <- EPS_t/YH_t;
plot(DATES,100*rel_error,type="h",
lwd=2,pch=20,ylab="Relative error (%)")
par(mfrow=c(1,2))
acf(array(EPS_s),
main="ACF for regression error",
lwd=3,col="blue",
xlab="index",ylab="ACF coeff. for error")
pacf(array(EPS_s),
main="PACF for regression error",
lwd=3,col="blue",
xlab="index",ylab="PACF coeff. for error")
par(mfrow=c(1,1))
ar.eps <- ar(array(EPS_s),order.max=20);
print(ad.test(ar.eps$resid))
Anderson-Darling normality test
data: ar.eps$resid
A = 2.7608, p-value = 5.656e-07
cat("=====================================\n");
=====================================
cat("Box-Ljung test for residuals of AR fit")
Box-Ljung test for residuals of AR fit
print(Box.test(ar.eps$resid, fitdf=3,lag=10, type="Ljung"))
Box-Ljung test
data: ar.eps$resid
X-squared = 7.9773, df = 7, p-value = 0.3346
AR_FIT <- array(ar.eps$ar);
plot(AR_FIT,
main="AR model fit for linear regression errors",
type="h",lwd=8, col="blue",
xlab="index",ylab="AR model coeff. for error")
abline(0,0)
grid();
## EPS_AR <- AR_FIT[1:2] %*% t(cbind(EPS_t[2:Ntot],EPS_t[1:(Ntot-1)]));
## EPS_AR <- c(EPS_t[1], EPS_t[2], EPS_AR[1:(length(EPS_AR)-1)]);
## AR2_RES <- EPS_t-EPS_AR
hist(ar.eps$resid,nclass=24, freq=F, col="grey",
main="histogram of residuals in AR model for regression errors")
DENS=density(ar.eps$resid[!is.na(ar.eps$resid)])
lines(DENS$x,DENS$y,lwd=5, col="darkblue")
plot(DATES,EPS_t, type="b",lwd=2, xlab="t", ylab="x(t)")
## ===================================================
#### prediction with AR model (NOT WORKING)
## predicted.eps <- predict(ar.eps, n.ahead=(Ntot-Npast))
## eps_mu <- predicted.eps$pred
## pred_dates <-DATES[(Npast+1):Ntot]
## lines(pred_dates,100*eps_mu,col="darkgreen",lwd=4)
## ===================================================
## ===================================================
#### HOLT'S LINEAR METHOD
library(forecast)
Loading required package: parallel
Loading required package: tseries
Loading required package: fracdiff
Loading required package: zoo
Attaching package: 'zoo'
The following object(s) are masked from 'package:base':
as.Date, as.Date.numeric
Loading required package: Rcpp
Loading required package: RcppArmadillo
Loading required package: colorspace
Loading required package: nnet
Loading required package: caret
Loading required package: lattice
Loading required package: reshape2
Loading required package: plyr
Loading required package: cluster
Loading required package: foreach
This is forecast 4.00
past_dates <- DATES[1:Npast]
data <- EPS_s;
n_predict=Ntot-Npast
fit1 <- holt(data, alpha=NULL, beta=NULL,
initial="optimal", h=n_predict, damped=FALSE)
fit3 <- holt(data, alpha=NULL, beta=NULL,
initial="optimal", h=n_predict, damped=TRUE)
pred_dates <-DATES[(Npast+1):Ntot]
## lines(past_dates,data, type="l", col="darkgreen", lwd=3)
lines(past_dates,fitted(fit1), col="blue", lwd=2)
lines(past_dates,fitted(fit3), col="red", lwd=2)
lines(pred_dates,fit1$mean, col="blue", lwd=2)
lines(pred_dates,fit3$mean, col="red",lwd=2)
abline(v=as.Date(final_date),col="grey2",lty=2,lwd=3)
legend("topleft", lty=1, col=c("black","blue","red"),
c("Historical Data",
"Holt's linear trend",
"Holt's damped trend"))
## ===================================================
## ===================================================
#### Loop over different arima models and try to fit
## ===================================================
data <- EPS_s;
pmax <- 3
imax <- 1
qmax <- 4
cat("=====================================\n");
=====================================
cat("Choosing the best ARIMA(p,i,q) model ...\n");
Choosing the best ARIMA(p,i,q) model ...
cat("=====================================\n");
=====================================
best.order<-c(0,0,0)
best.aic<-Inf
for (p in 0:pmax) for (i in 0:imax) for (q in 0:qmax)
{
fit.arima <-arima(data, order=c(p,i,q));
fit.aic<-fit.arima$aic;
if (fit.aic < best.aic)
{
best.order <- c(p,i,q);
best.arima <- fit.arima;
best.aic <-fit.aic;
}
}
cat("The best model is ARIMA(",
best.order[1],",",
best.order[2],",",
best.order[3],")\n");
The best model is ARIMA( 1 , 0 , 2 )
cat("AIC for this model is",best.aic,"\n");
AIC for this model is -2816.202
cat("=====================================\n");
=====================================
hist(residuals(best.arima), nclass=60, freq=F, col="violet",
main="Histogram of residuals for ARIMA(p,i,q) model")
dens.eps=density(residuals(best.arima))
lines(dens.eps$x,dens.eps$y,lwd=5, col="darkblue")
print(ad.test(residuals(best.arima)))
Anderson-Darling normality test
data: residuals(best.arima)
A = 2.2879, p-value = 8.137e-06
cat("=====================================\n");
=====================================
cat("Box-Ljung test for residuals of ARIMA fit")
Box-Ljung test for residuals of ARIMA fit
print(Box.test(residuals(best.arima),
fitdf=best.order[1]+best.order[3],
lag=10, type="Ljung"))
Box-Ljung test
data: residuals(best.arima)
X-squared = 1.5043, df = 7, p-value = 0.9822
## arima prediction
arima.pred <- predict(best.arima , n.ahead=n_predict)
## arima simulation
arima.model <- as.list(coef(best.arima))
mu=mean(as.vector(EPS_s))
sigma=sd(as.vector(EPS_s))
plot_predictions <- function()
{
plot(past_dates,EPS_s, type="l",lwd=2,
xlab="t", ylab="x(t)",
xlim=c(DATES[1],DATES[Ntot]),
ylim=c(min(EPS_t),max(EPS_t)),
main="Prediction of the residuals of linear fit")
lines(pred_dates,EPS_p,
col="grey", lwd=3)
lines(pred_dates,arima.pred$pred,
col="red", lwd=3)
lines(pred_dates,arima.pred$pred+2*arima.pred$se,
lwd=2, col="magenta", lty=2)
lines(pred_dates,arima.pred$pred-2*arima.pred$se,
lwd=2, col="magenta", lty=2)
}
suppressWarnings(plot_predictions());
nplots=40
arima.simul <- NULL
eps_i <- function (n)
{
## simple sampling from empirical distribution
## sample(residuals(best.arima), n,replace=TRUE)
## sampling from the smoothed distribution
suppressWarnings(sample(dens.eps$x, n, replace=TRUE,
prob=dens.eps$y))
}
arima.simul <- replicate(100*nplots,
arima.sim(model=arima.model,
n=n_predict,
n.start=1,
rand.gen = eps_i))
plot_simulations <- function()
{
plot(past_dates,EPS_s, type="l",lwd=2,
xlab="t", ylab="x(t)",
xlim=c(DATES[1],DATES[Ntot]),
ylim=c(mu-3*sigma,mu+3*sigma),
main="Simulation of the residuals of linear fit")
for (i in 1:nplots)
{
points(pred_dates[1:n_predict],
arima.simul[1:n_predict,i],
col=sample(colours(),5), lwd=1, pch=20, cex=0.5)
}
lines(pred_dates[1:n_predict],
arima.simul[1:n_predict,1], col="red", lwd=4)
hist(YR_t[Ntot]+arima.simul[n_predict,], nclass=60,
col="lightgreen",
xlab=Response,
main=sprintf("Histogram of simulations of %s for %s",
Response,DATES[Ntot]),xlim=c(0.017,0.02));
abline(v=YH_t[Ntot],col="red",lty=2,lwd=5)
hist(1/(YR_t[Ntot]+arima.simul[n_predict,]), nclass=60,
col="lightblue",
xlab=sprintf("1/%s",Response),
main=sprintf("Histogram of simulations of 1/%s for %s",
Response,DATES[Ntot]),xlim=c(50,60));
abline(v=1/YH_t[Ntot],col="red",lty=2,lwd=5)
}
suppressWarnings(plot_simulations());
## ===================================================
ZR_t <- YR_t[(Npast+1):Ntot]+arima.simul[1:n_predict,1];
plot(DATES,YH_t,
main="Simulations",
type="l", lwd=5, col="red",
xlab="", ylab="")
mtext(Response,side=2,col="red",line=3)
lines(DATES,YOUT, lwd=5, lty=1, col="magenta")
lines(pred_dates[1:n_predict],
ZR_t[1:n_predict],
lwd=5, lty=1, col="darkgreen")
abline(v=as.Date(final_date),col="grey2",lty=2,lwd=3)
grid();
## ===================================================
## plot correlations again
correlations <- cor(X[,Predictors])
print(round(correlations, 3))
OIL dOIL EURUSD GOOG RES REPO METALS
OIL 1.000 0.018 0.921 -0.684 0.948 -0.923 0.912
dOIL 0.018 1.000 0.116 -0.065 0.096 -0.080 0.069
EURUSD 0.921 0.116 1.000 -0.639 0.980 -0.869 0.950
GOOG -0.684 -0.065 -0.639 1.000 -0.647 0.672 -0.658
RES 0.948 0.096 0.980 -0.647 1.000 -0.913 0.958
REPO -0.923 -0.080 -0.869 0.672 -0.913 1.000 -0.867
METALS 0.912 0.069 0.950 -0.658 0.958 -0.867 1.000
library(corrplot)
library(ellipse)
corrplot(correlations,
method="ellipse",
type="full", order="hclust",
tl.pos="nd",
tl.col="black", tl.srt=45,
mar = c(2,2,2,2),oma = c(2,2,2,2),
main= "Correlation between predictor variables")
## ===================================================
# library(GGally)
# PAIRS <- ggpairs(X[,Predictors],
# lower=list(continuous="smooth"),
# params=c(method="loess"))
# show(PAIRS)