usdrub regression newest

Igor — Apr 29, 2015, 8:50 PM

rm(list=ls()) # clears out memory

setwd("~/projects")

refresh_data <- F

# Predictors <- c("OIL","OIL2","EURUSD","NI","AL","CU","AU");
# Predictors <- c("OIL","OIL2","EURUSD","METALS");
## Predictors <- c("OIL","REPO","EURUSD","GOOG","RES");
# Predictors <- c("OIL","OIL2","EURUSD","RES", "IMPORT","dOIL");
# Predictors <- c("OIL","dOIL","EURUSD","GOOG","RES","IMPORT","REPO");

Predictors <- c("OIL","dOIL","EURUSD","GOOG","RES","REPO","METALS");
# Predictors <- c("OIL","OIL2")

Response <- "RUBUSD"

start_date="2014-06-01"

final_date="2015-04-14"
#final_date="2015-04-01"
#final_date="2015-04-20"


plot_xaxis="OIL"

BCP=-1.5 # Box-Cox transformation parameter


outliers <- c(135,136,137,138,139,140)





library(moments)

usd.file="./QUANDL_USDRUB.csv"
eur.file="./QUANDL_EURUSD.csv"
oil.file="./QUANDL_OIL.csv"
ni.file="./QUANDL_NI.csv"
al.file="./QUANDL_AL.csv"
cu.file="./QUANDL_CU.csv"
au.file="./QUANDL_AU.csv"


U1="https://www.quandl.com/api/v1/datasets/"
U2=".csv?trim_start="

if(refresh_data)
  {
    url_usd=paste(U1,"CURRFX/USDRUB",U2,start_date,sep="")
    download.file(url_usd, usd.file, "wget")

    url_eur=paste(U1,"CURRFX/EURUSD",U2,start_date,sep="")
    download.file(url_eur, eur.file, "wget")

    url_oil=paste(U1,"OPEC/ORB",U2,start_date,sep="")
    download.file(url_oil, oil.file, "wget")

    url_al=paste(U1,"OFDP/ALUMINIUM_21",U2,start_date,sep="")
    download.file(url_al, al.file, "wget")

    url_ni=paste(U1,"OFDP/NICKEL_41",U2,start_date,sep="")
    download.file(url_ni, ni.file, "wget")

    url_cu=paste(U1,"OFDP/COPPER_6",U2,start_date,sep="")
    download.file(url_cu, cu.file, "wget")

    url_au=paste(U1,"WSJ/AU_EIB",U2,start_date,sep="")
    download.file(url_au, au.file, "wget")
  }

## forex
usd_ <- read.csv(usd.file, header = TRUE, sep = ",")
eur_ <- read.csv(eur.file, header = TRUE, sep = ",")

## energy
oil_ <- read.csv(oil.file, header = TRUE, sep = ",")

## metals
al_ <- read.csv(al.file, header = TRUE, sep = ",")
ni_ <- read.csv(ni.file, header = TRUE, sep = ",")
cu_ <- read.csv(cu.file, header = TRUE, sep = ",")
au_ <- read.csv(au.file, header = TRUE, sep = ",")


## ===================================================

NN=length(usd_$Date)
usd.data <- data.frame(DATE=as.Date(usd_$Date),
                       USDRUB=usd_$Rate,
                       RUBUSD=1/usd_$Rate)

dusd.data <-
  data.frame(DATE=as.Date(usd.data$DATE[-NN]),
             dUSDRUB=diff(usd.data$USDRUB)/usd.data$USDRUB[-NN],
             dRUBUSD=diff(usd.data$RUBUSD)/usd.data$RUBUSD[-NN]);

usd.data <- merge(usd.data, dusd.data, by="DATE");

## ===================================================

NN=length(eur_$Date)

eur.data <- data.frame(DATE=as.Date(eur_$Date),
                       EURUSD=eur_$Rate,
                       USDEUR=1/eur_$Rate);

deur.data <-
  data.frame(DATE=as.Date(eur.data$DATE[-NN]),
             dUSDEUR=diff(eur.data$USDEUR)/eur.data$USDEUR[-NN],
             dEURUSD=diff(eur.data$EURUSD)/eur.data$EURUSD[-NN]);

eur.data <- merge(eur.data, deur.data, by="DATE");

## ===================================================

NN=length(oil_$Date)
oil.data <- data.frame(DATE=as.Date(oil_$Date[-NN]),
                       OIL=oil_$Value[-NN],
                       dOIL=diff(oil_$Value)/oil_$Value[-NN])


al.data <- data.frame(DATE=as.Date(al_$Date), AL=al_$Mid)
ni.data <- data.frame(DATE=as.Date(ni_$Date), NI=ni_$Mid)
cu.data <- data.frame(DATE=as.Date(cu_$Date), CU=cu_$Mid)
au.data <- data.frame(DATE=as.Date(au_$Date), AU=au_$Value)


merged.data <- merge(usd.data, eur.data, by="DATE")
merged.data <- merge(merged.data, oil.data, by="DATE")
merged.data <- merge(merged.data, al.data, by="DATE")
merged.data <- merge(merged.data, ni.data, by="DATE")
merged.data <- merge(merged.data, cu.data, by="DATE")
merged.data <- merge(merged.data, au.data, by="DATE")



##===================================================================
## Plot Crude Oil Prices (sort "Brent")

par(mfrow=c(1,1))
plot(oil.data$DATE, oil.data$OIL,
     main="Oil Prices",
     type="b", pch=16, lwd=2, 
     xlab="Dates",ylab="Oil Prices ($)")
points(oil.data$DATE,oil.data$OIL,
       col="red",lwd=2)
grid()

plot of chunk unnamed-chunk-1

##===================================================================
## Plot Exchange Rate data (USDRUB)

par(mfrow=c(1,1))
plot(usd.data$DATE, usd.data$USDRUB,
     main="Exchange Rate USDRUB",
     type="b", pch=16, lwd=2, 
     xlab="Dates",ylab="USDRUB")
points(usd.data$DATE,usd.data$USDRUB,
       col="red",lwd=2)
grid()

plot of chunk unnamed-chunk-1

##===================================================================
## Plot Exchange Rate data (RUBUSD)

par(mfrow=c(1,1))
plot(usd.data$DATE, usd.data$RUBUSD,
     main="Exchange Rate RUBUSD",
     type="b", pch=16, lwd=2, 
     xlab="Dates",ylab="RUBUSD")
points(usd.data$DATE,usd.data$RUBUSD,
       col="red",lwd=2)
grid()

plot of chunk unnamed-chunk-1

##===================================================================
## Download and plot Reserves of Russian Central Bank

reserves.file <- "ru_cbank_reserves.csv"
reserves.csv <- read.csv(reserves.file, header = TRUE, sep = ",")

reserves_ <- data.frame(DATE=as.Date(reserves.csv$Date,
                                      format="%d/%m/%Y"),
                        Reserves=reserves.csv$Amount);

reserves.interp <- spline(reserves_$DATE,reserves_$Reserves, 
                          xout=usd.data$DATE,method="natural")$y

NN=length(usd.data$DATE)
reserves.data <- data.frame(DATE=usd.data$DATE[-NN],
                            RES=reserves.interp[-NN],
     dRES=diff(reserves.interp)/reserves.interp[-NN])

par(mfrow=c(1,1))
plot(usd.data$DATE,reserves.interp,
     main="International Reserves of Russia",
     type="b", pch=16, lwd=2, 
     xlab="Dates",ylab="Reserves (billions USD)")
grid()
points(reserves_$DATE,reserves_$Reserves,col="red",lwd=4)

plot of chunk unnamed-chunk-1

merged.data <- merge(merged.data, reserves.data, by="DATE")

##===================================================================

imports.file <- "russia_imports.csv"
imports.csv <- read.csv(imports.file, header = TRUE, sep = ",")

imports_ <- data.frame(DATE=as.Date(imports.csv$Date,
                                      format="%d/%m/%Y"),
                       Imports=imports.csv$Imports);

imports.interp <- spline(imports_$DATE,imports_$Imports, 
                         xout=usd.data$DATE,method="natural")$y

NN=length(usd.data$DATE)
imports.data <- data.frame(DATE=usd.data$DATE[-NN],
                            IMPORT=imports.interp[-NN],
     dIMPORT=diff(imports.interp)/imports.interp[-NN])

par(mfrow=c(1,1))
plot(usd.data$DATE,imports.interp,
     main="Imports of Russia",
     type="b", pch=16, lwd=2, 
     xlab="Dates",ylab="Imports (millions USD)")
grid()
points(imports_$DATE,imports_$Imports,col="red",lwd=4)

plot of chunk unnamed-chunk-1

merged.data <- merge(merged.data, imports.data, by="DATE")

##===================================================================
##  http://www.cbr.ru/eng/hd_base/Default.aspx?Prtid=micex_doc

tvolume.file <- "ru_cbank_usdrub_tvolume.csv"

tvolume.csv <- read.csv(tvolume.file, header = TRUE, sep = ",")

tvolume_ <- data.frame(DATE=as.Date(tvolume.csv$Date,
                                      format="%d/%m/%Y"),
                       Tvolume=tvolume.csv$TVol);

tvolume.interp <- approx(tvolume_$DATE,tvolume_$Tvolume, 
                         xout=usd.data$DATE, rule=2,
                         method="constant")$y

NN=length(usd.data$DATE)
tvolume.data <- data.frame(DATE=usd.data$DATE[-NN],
                           TVOL=tvolume.interp[-NN],
     dTVOL=diff(tvolume.interp)/tvolume.interp[-NN])

par(mfrow=c(1,1))
plot(usd.data$DATE,tvolume.interp,
     main="Trade Volume",
     type="h", pch=16, lwd=2, 
     xlab="Dates",ylab="Trade Volume")
grid()
points(tvolume_$DATE,tvolume_$Tvolume,col="red",lwd=4)

plot of chunk unnamed-chunk-1

merged.data <- merge(merged.data, tvolume.data, by="DATE")

## ##===================================================================
## http://www.cbr.ru/Eng/hd_base/Default.aspx?Prtid=swapmonthtotal

fxswap.file <- "ru_cbank_fxswaps_volume.csv"

fxswap.csv <- read.csv(fxswap.file, header = TRUE, sep = ",")

fxswap_ <- data.frame(DATE=as.Date(fxswap.csv$Date,
                                      format="%d/%m/%Y"),
                       Fxswap=fxswap.csv$SWAP.RUB);

fxswap.interp <- approx(fxswap_$DATE,fxswap_$Fxswap, 
                        xout=usd.data$DATE, rule=2,
                        method="constant")$y

fxswap.data <- data.frame(DATE=usd.data$DATE,
                          FXSWAP=fxswap.interp)

par(mfrow=c(1,1))
plot(usd.data$DATE,fxswap.interp,
     main="FX Swap Volume",
     type="h", pch=16, lwd=2, 
     xlab="Dates",ylab="FX Volume")
grid()
points(fxswap_$DATE,fxswap_$Fxswap,col="red",lwd=4)

plot of chunk unnamed-chunk-1

merged.data <- merge(merged.data, fxswap.data, by="DATE")

## ##===================================================================

google.ru.file <- "google_trend_search_rate_russia.csv"
google.ru.csv <- read.csv(google.ru.file, header = TRUE, sep = ",")

google.ru_ <- data.frame(DATE=as.Date(google.ru.csv$Date,
                                      format="%Y-%m-%d"),
                       Google.Ru=google.ru.csv$Searches);

google.ru.interp <- spline(google.ru_$DATE,google.ru_$Google.Ru, 
                         xout=usd.data$DATE,method="natural")$y

NN=length(usd.data$DATE)
google.ru.data <- data.frame(DATE=usd.data$DATE[-NN],
                             GOOG=google.ru.interp[-NN],
     dGOOG=diff(google.ru.interp)/google.ru.interp[-NN])

par(mfrow=c(1,1))
plot(usd.data$DATE,google.ru.interp,
     main="Google Search Trend for 'Russia'",
     type="b", pch=16, lwd=2, 
     xlab="Dates",ylab="Num. of Searches")
grid()
points(google.ru_$DATE,google.ru_$Google.Ru,col="red",lwd=4)

plot of chunk unnamed-chunk-1

merged.data <- merge(merged.data, google.ru.data, by="DATE")

##===================================================================
## data from http://www.cbr.ru/eng/hd_base/Default.aspx?Prtid=repo

repo.rate.file <- "ru_cbank_repo_rates_7d.csv"
repo.rate.csv <- read.csv(repo.rate.file, header = TRUE, sep = ",")

repo.rate_ <- data.frame(DATE=as.Date(repo.rate.csv$Date,
                                      format="%d/%m/%Y"),
                       Repo.Rate=repo.rate.csv$RepoRate);

repo.rate.interp <- approx(repo.rate_$DATE,repo.rate_$Repo.Rate, 
                           xout=usd.data$DATE, rule=2,
                           method="constant")$y

NN=length(usd.data$DATE)
repo.rate.data <- data.frame(DATE=usd.data$DATE,
                             REPO=repo.rate.interp)

par(mfrow=c(1,1))
plot(usd.data$DATE,repo.rate.interp,
     main="Repo Rates",
     type="b", pch=16, lwd=2, 
     xlab="Dates",ylab="Repo Rate")
grid()
points(repo.rate_$DATE,repo.rate_$Repo.Rate,col="red",lwd=4)

plot of chunk unnamed-chunk-1

merged.data <- merge(merged.data, repo.rate.data, by="DATE")

##===================================================================
## data from http://www.levada.ru/indeksy

opinion.file <- "opinion_about_russia.csv"
opinion.csv <- read.csv(opinion.file, header = TRUE, sep = ",")

opinion_ <- data.frame(DATE=as.Date(opinion.csv$Date,
                                      format="%d/%m/%Y"),
                       Opinion=opinion.csv$Right);

opinion.interp <- spline(opinion_$DATE,opinion_$Opinion, 
                         xout=usd.data$DATE,method="natural")$y

NN=length(usd.data$DATE)
opinion.data <- data.frame(DATE=usd.data$DATE[-NN],
                           OPIN=opinion.interp[-NN],
     dOPIN=diff(opinion.interp)/opinion.interp[-NN])

par(mfrow=c(1,1))
plot(usd.data$DATE,opinion.interp,
     main="Opinion of Russians of situation in the country",
     type="b", pch=16, lwd=2, 
     xlab="Dates",ylab="Percentage of optimists")
grid()
points(opinion_$DATE,opinion_$Opinion,col="red",lwd=4)

plot of chunk unnamed-chunk-1

merged.data <- merge(merged.data, opinion.data, by="DATE")



##===================================================================
##===================================================================



X = merged.data[-outliers,]


##===================================================================



FOO <- function(U){1/U}  
BAR <- function(V){1/V} # inverse of FOO



X = data.frame(X, "Y"= (FOO(X[[Response]])^BCP-1)/BCP,
  "OIL2"   = X$OIL^2,
  "METALS" = X$AL/mean(X$AL)+
             X$CU/mean(X$CU)+
             X$NI/mean(X$NI)+
             X$AU/mean(X$AU))

MX <- apply(X[,Predictors],2,mean);

Ntot=dim(X)[1]

Xpast = subset(X,X$DATE < as.Date(final_date))

Npast=dim(Xpast)[1]

##===================================================================
## Plot Prices Of Metals

par(mfrow=c(1,1))
plot(X$DATE, X$METALS,
     main="Prices of metals",
     type="b", pch=16, lwd=2, 
     xlab="Dates",ylab="METALS")
points(X$DATE, X$METALS,
       col="red",lwd=2)
grid()

plot of chunk unnamed-chunk-1

##===================================================================
### Factor analysis:

par(mfrow=c(1,1))
plot(X[,Predictors], col="darkred", main="Predictor Variables")

plot of chunk unnamed-chunk-1

Nfactors=2

fmodel <- factanal(X[,Predictors], Nfactors, rotation="varimax");
cat("Factor model"); print(fmodel)
Factor model

Call:
factanal(x = X[, Predictors], factors = Nfactors, rotation = "varimax")

Uniquenesses:
   OIL   dOIL EURUSD   GOOG    RES   REPO METALS 
 0.060  0.969  0.020  0.519  0.009  0.074  0.074 

Loadings:
       Factor1 Factor2
OIL     0.948   0.205 
dOIL            0.174 
EURUSD  0.867   0.479 
GOOG   -0.691         
RES     0.918   0.386 
REPO   -0.959         
METALS  0.875   0.401 

               Factor1 Factor2
SS loadings      4.654   0.621
Proportion Var   0.665   0.089
Cumulative Var   0.665   0.754

Test of the hypothesis that 2 factors are sufficient.
The chi square statistic is 31.73 on 8 degrees of freedom.
The p-value is 0.000104 
FLOAD <- t(fmodel$loadings[,1:Nfactors])

barplot(FLOAD,beside=T,col=c("#FF5500","#55AA55"),
        main="Loadings of the Factors",xlab="Component",ylab="A(i)")

legend("bottomright",rownames(FLOAD),
       fill=c("#FF5500","#55AA55"))

plot of chunk unnamed-chunk-1

##===================================================================
#### Prinicipal component analysis

PCA <- prcomp(X[,Predictors])
print(summary(PCA))
Importance of components:
                            PC1      PC2     PC3     PC4     PC5     PC6
Standard deviation     401.3944 38.70358 6.38254 1.33004 0.07477 0.01906
Proportion of Variance   0.9905  0.00921 0.00025 0.00001 0.00000 0.00000
Cumulative Proportion    0.9905  0.99974 0.99999 1.00000 1.00000 1.00000
                           PC7
Standard deviation     0.01649
Proportion of Variance 0.00000
Cumulative Proportion  1.00000
plot(PCA, main="Principal Components")

plot of chunk unnamed-chunk-1

## plot(PCA$rotation[,1], type="h", lwd=4, col="red",
##      ylim=c(-1,1),
##      main="Principal components loadings")
## lines(PCA$rotation[,2], type="h", lwd=4, col="green")
## lines(PCA$rotation[,3], type="h", lwd=4, col="blue")

PCA.DF <- t(PCA$rotation[,1:2])

barplot(PCA.DF,beside=T,col=c("#FF5500","#55AA55"),
        main="Loadings of Principal Components",
        xlab="Component",ylab="A(i)")
abline(0,0,lwd=0.5)

legend("topright",rownames(PCA.DF),
       fill=c("#FF5500","#55AA55"))

plot of chunk unnamed-chunk-1

##===================================================================
### Linear Regression:

Formula <- formula(paste("Y ~ ", paste(Predictors, collapse=" + ")))

LM1=lm(Formula, Xpast)

print(summary(LM1)) #summary of results                                                                                                                       

Call:
lm(formula = Formula, data = Xpast)

Residuals:
       Min         1Q     Median         3Q        Max 
-1.803e-04 -4.415e-05 -4.322e-06  5.438e-05  1.659e-04 

Coefficients:
              Estimate Std. Error  t value Pr(>|t|)    
(Intercept)  6.669e-01  1.936e-04 3445.529  < 2e-16 ***
OIL         -2.920e-05  8.153e-07  -35.808  < 2e-16 ***
dOIL        -8.840e-04  2.810e-04   -3.146  0.00192 ** 
EURUSD       8.249e-04  2.591e-04    3.184  0.00170 ** 
GOOG         2.858e-07  1.765e-08   16.198  < 2e-16 ***
RES         -5.543e-06  7.726e-07   -7.175 1.52e-11 ***
REPO        -2.263e-05  3.728e-06   -6.070 6.64e-09 ***
METALS       2.776e-04  6.440e-05    4.311 2.58e-05 ***
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1 

Residual standard error: 6.797e-05 on 193 degrees of freedom
Multiple R-squared: 0.9928, Adjusted R-squared: 0.9925 
F-statistic:  3785 on 7 and 193 DF,  p-value: < 2.2e-16 
## create plots

par(mfrow=c(1,1))

library(MASS)
if(BCP != 1)
  {
    boxcox(LM1, lambda=seq(-2.5, 4.5, 0.1))
  }

plot of chunk unnamed-chunk-1

par(mfrow=c(1,1))

hist(LM1$residuals,nclass=24, freq=F, col="lightgreen",
     main="Histogram of Residuals", xlab="Residuals")
DENS=density(LM1$residuals)
lines(DENS$x,DENS$y,lwd=5, col="darkblue")

plot of chunk unnamed-chunk-1

par(mfrow=c(1,1))
plot(LM1, which=c(4))

plot of chunk unnamed-chunk-1

par(mfrow=c(2,3))    #drawing in 2 by 2 format                                                                                                                
plot(LM1, which=c(1,2,3,4,5,6))

plot of chunk unnamed-chunk-1

par(mfrow=c(1,1))

tfirst = X$DATE[1]
tlast = X$DATE[Ntot]

xmin <- apply(X,2,min)
xmax <- apply(X,2,max)

MATX <- cbind(1,X[,Predictors]);
YHAT <- LM1$coef %*% t(MATX);

XOUT <- MATX[,plot_xaxis];
YOUT <- BAR((1+BCP*YHAT)^(1/BCP));

dR=max(X[[Response]])-min(X[[Response]]);
ymin=min(X[[Response]])-0.1*dR;
ymax=max(X[[Response]])+0.1*dR;
plot(X[[plot_xaxis]], X[[Response]],
     type="p",col="red",lwd=4,
     xlab=plot_xaxis, ylab=Response,
     ylim=c(ymin,ymax),
     main=sprintf("%s from  %s  to  %s",Response,tfirst,tlast))

grid();
points(XOUT,YOUT, lwd=4, col="magenta")

points(X[Ntot,plot_xaxis],X[Ntot,Response], type = "p", col="darkred", lwd=6, pch = 21)
text(X[Ntot,plot_xaxis],X[Ntot,Response], "â—€ H", adj=-0.2, col="black")

points(XOUT[Ntot],YOUT[Ntot], type = "p", col="darkmagenta", lwd=6, pch = 21)
text(XOUT[Ntot],YOUT[Ntot], "â—€ M",adj=-0.2, col="black")

legend("topleft",inset = 0.05, c("History    ","Model    "), 
       pch=c(1,1), lty=c(0,0), lwd=c(4,4), col=c("red","magenta"))

plot of chunk unnamed-chunk-1

cat("mean=", mean(LM1$residual),"\n")
mean= -6.172331e-22 
cat("st.dev.=", sd(LM1$residual),"\n")
st.dev.= 6.677037e-05 
cat("skewness=", skewness(LM1$residual),"\n")
skewness= -0.0008588909 
cat("kurtosis=", kurtosis(LM1$residual),"\n")
kurtosis= 2.544704 
print(shapiro.test(LM1$residual))

    Shapiro-Wilk normality test

data:  LM1$residual 
W = 0.9931, p-value = 0.4657
library(nortest)
print(ad.test(LM1$residual))

    Anderson-Darling normality test

data:  LM1$residual 
A = 0.4186, p-value = 0.325
## Plot first set of data and draw its axis
DATES=as.Date(X[["DATE"]])
par(mar = c(5, 4, 4, 4) + 0.3)

plot(DATES,X[[Response]],
     main=sprintf("Time Series of %s and %s",
       Response,plot_xaxis),
     axes=FALSE, type="l", lwd=5, col="red",
     xlab="", ylab="")

axis(2, col="red", col.axis="red", las=1)
mtext(Response,side=2,col="red",line=3)

lines(DATES,YOUT, lwd=5, lty=1, col="magenta")

## Allow a second plot on the same graph
par(new=TRUE)

## Plot the second plot and put axis scale on right


plot(DATES,X[[plot_xaxis]],
     axes=FALSE, type="l",lwd=3,lty=1,col="blue", xlab="", ylab="")
axis(4, col="blue", col.axis="blue", las=1)
mtext(plot_xaxis,side=4,line=2.5, col="blue")

box(); 

## Draw the time axis
RNG=seq(DATES[1],DATES[Ntot],by="month")
axis.Date(1,at=RNG,format="%b")
mtext("Date",side=1,col="black",line=3)

abline(v=as.Date(final_date),col="grey2",lty=2,lwd=3)

grid(NULL,NA)

legend("bottomleft",inset = 0.05, c(plot_xaxis,Response,"Model"), 
       lty=c(1,1,1), lwd=c(3,4,5), col=c("blue","red","magenta"))

plot of chunk unnamed-chunk-1

#===================================================================

YH_t  <- X[[Response]];
YR_t  <- YOUT;
EPS_t <- X[[Response]]-YOUT;
EPS_s <- EPS_t[1:Npast];
EPS_p <- EPS_t[(Npast+1):Ntot];

rel_error <- EPS_t/YH_t;

plot(DATES,100*rel_error,type="h",
     lwd=2,pch=20,ylab="Relative error (%)")

plot of chunk unnamed-chunk-1

par(mfrow=c(1,2))

acf(array(EPS_s),
    main="ACF for regression error",
    lwd=3,col="blue",
    xlab="index",ylab="ACF coeff. for error")

pacf(array(EPS_s),
    main="PACF for regression error",
    lwd=3,col="blue",
    xlab="index",ylab="PACF coeff. for error")

plot of chunk unnamed-chunk-1

par(mfrow=c(1,1))

ar.eps <- ar(array(EPS_s),order.max=20);

print(ad.test(ar.eps$resid))

    Anderson-Darling normality test

data:  ar.eps$resid 
A = 2.7608, p-value = 5.656e-07
cat("=====================================\n");
=====================================
cat("Box-Ljung test for residuals of AR fit")
Box-Ljung test for residuals of AR fit
print(Box.test(ar.eps$resid, fitdf=3,lag=10, type="Ljung"))

    Box-Ljung test

data:  ar.eps$resid 
X-squared = 7.9773, df = 7, p-value = 0.3346
AR_FIT <- array(ar.eps$ar);

plot(AR_FIT,
     main="AR model fit for linear regression errors",
     type="h",lwd=8, col="blue",
     xlab="index",ylab="AR model coeff. for error")
abline(0,0)
grid();

plot of chunk unnamed-chunk-1

## EPS_AR <- AR_FIT[1:2] %*% t(cbind(EPS_t[2:Ntot],EPS_t[1:(Ntot-1)]));
## EPS_AR <- c(EPS_t[1], EPS_t[2], EPS_AR[1:(length(EPS_AR)-1)]);
## AR2_RES <- EPS_t-EPS_AR


hist(ar.eps$resid,nclass=24, freq=F, col="grey",
     main="histogram of residuals in AR model for regression errors")

DENS=density(ar.eps$resid[!is.na(ar.eps$resid)])
lines(DENS$x,DENS$y,lwd=5, col="darkblue")

plot of chunk unnamed-chunk-1

plot(DATES,EPS_t, type="b",lwd=2, xlab="t", ylab="x(t)")
## ===================================================
#### prediction with AR model (NOT WORKING)
## predicted.eps  <- predict(ar.eps, n.ahead=(Ntot-Npast))
## eps_mu <- predicted.eps$pred
## pred_dates <-DATES[(Npast+1):Ntot]
## lines(pred_dates,100*eps_mu,col="darkgreen",lwd=4)
## ===================================================



## ===================================================
#### HOLT'S LINEAR METHOD
library(forecast)
Loading required package: parallel
Loading required package: tseries
Loading required package: fracdiff
Loading required package: zoo

Attaching package: 'zoo'

The following object(s) are masked from 'package:base':

    as.Date, as.Date.numeric

Loading required package: Rcpp
Loading required package: RcppArmadillo
Loading required package: colorspace
Loading required package: nnet
Loading required package: caret
Loading required package: lattice
Loading required package: reshape2
Loading required package: plyr
Loading required package: cluster
Loading required package: foreach
This is forecast 4.00 
past_dates <- DATES[1:Npast]
data <- EPS_s;

n_predict=Ntot-Npast
fit1 <- holt(data, alpha=NULL, beta=NULL,
             initial="optimal", h=n_predict, damped=FALSE)
fit3 <- holt(data, alpha=NULL, beta=NULL,
             initial="optimal", h=n_predict, damped=TRUE)

pred_dates <-DATES[(Npast+1):Ntot]
## lines(past_dates,data, type="l", col="darkgreen", lwd=3)

lines(past_dates,fitted(fit1), col="blue", lwd=2)
lines(past_dates,fitted(fit3), col="red", lwd=2)
lines(pred_dates,fit1$mean, col="blue", lwd=2)
lines(pred_dates,fit3$mean, col="red",lwd=2)
abline(v=as.Date(final_date),col="grey2",lty=2,lwd=3)
legend("topleft", lty=1, col=c("black","blue","red"),
       c("Historical Data",
         "Holt's linear trend",
         "Holt's damped trend"))

plot of chunk unnamed-chunk-1

## ===================================================


## ===================================================
#### Loop over different arima models and try to fit
## ===================================================
data <- EPS_s;

pmax <- 3
imax <- 1
qmax <- 4

cat("=====================================\n");
=====================================
cat("Choosing the best ARIMA(p,i,q) model ...\n");
Choosing the best ARIMA(p,i,q) model ...
cat("=====================================\n");
=====================================
best.order<-c(0,0,0)
best.aic<-Inf
for (p in 0:pmax) for (i in 0:imax) for (q in 0:qmax)
{

  fit.arima <-arima(data, order=c(p,i,q));
  fit.aic<-fit.arima$aic;

  if (fit.aic < best.aic)
    {
      best.order <- c(p,i,q);
      best.arima <- fit.arima;
      best.aic <-fit.aic;
    }
}

cat("The best model is ARIMA(",
    best.order[1],",",
    best.order[2],",",
    best.order[3],")\n");
The best model is ARIMA( 1 , 0 , 2 )
cat("AIC for this model is",best.aic,"\n");
AIC for this model is -2816.202 
cat("=====================================\n");
=====================================
hist(residuals(best.arima), nclass=60, freq=F, col="violet",
     main="Histogram of residuals for ARIMA(p,i,q) model")
dens.eps=density(residuals(best.arima))
lines(dens.eps$x,dens.eps$y,lwd=5, col="darkblue")

plot of chunk unnamed-chunk-1

print(ad.test(residuals(best.arima)))

    Anderson-Darling normality test

data:  residuals(best.arima) 
A = 2.2879, p-value = 8.137e-06
cat("=====================================\n");
=====================================
cat("Box-Ljung test for residuals of ARIMA fit")
Box-Ljung test for residuals of ARIMA fit
print(Box.test(residuals(best.arima),
               fitdf=best.order[1]+best.order[3],
               lag=10, type="Ljung"))

    Box-Ljung test

data:  residuals(best.arima) 
X-squared = 1.5043, df = 7, p-value = 0.9822
## arima prediction
arima.pred <- predict(best.arima , n.ahead=n_predict)

## arima simulation

arima.model <- as.list(coef(best.arima))

mu=mean(as.vector(EPS_s))
sigma=sd(as.vector(EPS_s))




plot_predictions <- function()
  {
    plot(past_dates,EPS_s, type="l",lwd=2,
         xlab="t", ylab="x(t)",
         xlim=c(DATES[1],DATES[Ntot]),
         ylim=c(min(EPS_t),max(EPS_t)),
         main="Prediction of the residuals of linear fit")
    lines(pred_dates,EPS_p,
          col="grey", lwd=3)
    lines(pred_dates,arima.pred$pred,
          col="red", lwd=3)
    lines(pred_dates,arima.pred$pred+2*arima.pred$se,
          lwd=2, col="magenta", lty=2)
    lines(pred_dates,arima.pred$pred-2*arima.pred$se,
          lwd=2, col="magenta", lty=2)

  }
suppressWarnings(plot_predictions());

plot of chunk unnamed-chunk-1

nplots=40
arima.simul <- NULL



eps_i <- function (n)
  {
    ## simple sampling from empirical distribution
    ## sample(residuals(best.arima), n,replace=TRUE)

    ## sampling from the smoothed distribution
    suppressWarnings(sample(dens.eps$x, n, replace=TRUE,
                            prob=dens.eps$y))
  }


arima.simul <- replicate(100*nplots,
                     arima.sim(model=arima.model,
                               n=n_predict,
                               n.start=1,
                               rand.gen = eps_i))

plot_simulations <- function()
  {
    plot(past_dates,EPS_s, type="l",lwd=2,
         xlab="t", ylab="x(t)",
         xlim=c(DATES[1],DATES[Ntot]),
         ylim=c(mu-3*sigma,mu+3*sigma),
         main="Simulation of the residuals of linear fit")

    for (i in 1:nplots)
      {
        points(pred_dates[1:n_predict],
               arima.simul[1:n_predict,i],
              col=sample(colours(),5), lwd=1, pch=20, cex=0.5)
      }

    lines(pred_dates[1:n_predict],
          arima.simul[1:n_predict,1], col="red", lwd=4)

    hist(YR_t[Ntot]+arima.simul[n_predict,], nclass=60,
         col="lightgreen",
         xlab=Response,
         main=sprintf("Histogram of simulations of %s for %s",
           Response,DATES[Ntot]),xlim=c(0.017,0.02));
    abline(v=YH_t[Ntot],col="red",lty=2,lwd=5)

    hist(1/(YR_t[Ntot]+arima.simul[n_predict,]), nclass=60,
         col="lightblue",
         xlab=sprintf("1/%s",Response),
         main=sprintf("Histogram of simulations of 1/%s for %s",
           Response,DATES[Ntot]),xlim=c(50,60));
    abline(v=1/YH_t[Ntot],col="red",lty=2,lwd=5)

  }
suppressWarnings(plot_simulations());

plot of chunk unnamed-chunk-1 plot of chunk unnamed-chunk-1 plot of chunk unnamed-chunk-1

## ===================================================






ZR_t <- YR_t[(Npast+1):Ntot]+arima.simul[1:n_predict,1];

plot(DATES,YH_t,
     main="Simulations",
     type="l", lwd=5, col="red",
     xlab="", ylab="")

mtext(Response,side=2,col="red",line=3)

lines(DATES,YOUT, lwd=5, lty=1, col="magenta")
lines(pred_dates[1:n_predict],
      ZR_t[1:n_predict],
      lwd=5, lty=1, col="darkgreen")
abline(v=as.Date(final_date),col="grey2",lty=2,lwd=3)
grid();

plot of chunk unnamed-chunk-1

## ===================================================
## plot correlations again

correlations <- cor(X[,Predictors])
print(round(correlations, 3))
          OIL   dOIL EURUSD   GOOG    RES   REPO METALS
OIL     1.000  0.018  0.921 -0.684  0.948 -0.923  0.912
dOIL    0.018  1.000  0.116 -0.065  0.096 -0.080  0.069
EURUSD  0.921  0.116  1.000 -0.639  0.980 -0.869  0.950
GOOG   -0.684 -0.065 -0.639  1.000 -0.647  0.672 -0.658
RES     0.948  0.096  0.980 -0.647  1.000 -0.913  0.958
REPO   -0.923 -0.080 -0.869  0.672 -0.913  1.000 -0.867
METALS  0.912  0.069  0.950 -0.658  0.958 -0.867  1.000
library(corrplot)
library(ellipse)
corrplot(correlations,
         method="ellipse",
         type="full", order="hclust",
         tl.pos="nd",
         tl.col="black", tl.srt=45,
         mar = c(2,2,2,2),oma = c(2,2,2,2),
         main= "Correlation between predictor variables")

plot of chunk unnamed-chunk-1

## ===================================================
# library(GGally)
# PAIRS <- ggpairs(X[,Predictors], 
#         lower=list(continuous="smooth"),
#         params=c(method="loess"))
# show(PAIRS)