correlation.r

Emilio — Jun 22, 2014, 8:03 PM

# ===============================================================================
# CORRELATION.r -> Correlations analysis between EURUSD and Fuel Sales Price
#               by Emilio González González
#               End of Degree Project.  Jan-June 2014
# ===============================================================================
setwd("~/Economic Degree/TrabajoFinGrado")
library("plyr")
library("lattice")
library("ggplot2")
library("xtable")
# Functions used
# ==============================================================================
filter_data <-function(data, limit){
    newdata <- data[data$quantity<limit,]
    return(newdata)
}
graficar <- function(data, variable, titulo="Titulo"){
    attach(data)
    xyplot(variable ~ date, type="l", main=titulo )
    d <-density(variable)
    plot(d)
    polygon(d, col="red", border="blue") 
}
fillNA <- function(data, variable){
    last = 1
    for (i in 1:nrow(data)) {
        if (is.na(data[i,variable]))
            data[i,variable] = last
        else
            last <- data[i,variable]
    }
    return(data)
}
def.par <- par(no.readonly = TRUE) # save default, for resetting...

# Read product, price data and group by date: one record per date and product
# ==============================================================================
data <- read.table("CSVCarburantesFecha.csv", header=TRUE, sep=";", na.strings="NA", dec=",", strip.white=TRUE)
factor.fuels <- levels(data$product)

data <- filter_data(data, 80)
dataD <-ddply(data, c("date","product"), summarise, avgprice=mean(price))
order.date <- order(as.Date(dataD$date, format="%d/%m/%Y"))
dataSorted <- dataD[order.date,]
dataSorted$date <- as.Date(dataSorted$date,  format="%d/%m/%Y")
dataD <-dataSorted

# Read EURUSD data. Note usd will be 1/eurusd
# ==============================================================================
data <- read.csv('http://www.quandl.com/api/v1/datasets/QUANDL/EURUSD.csv?&trim_start=1999-09-06&trim_end=2014-06-02&sort_order=desc', colClasses=c('Date'='Date'))
eurusd <- data[,c("Date","Rate")]
colnames(eurusd) <- c("date", "rate")

# Read Brent data
# ==============================================================================
brent <-read.csv('http://www.quandl.com/api/v1/datasets/DOE/RBRTE.csv?&trim_start=1987-05-20&trim_end=2014-06-02&sort_order=desc', colClasses=c('Date'='Date'))
colnames(brent) <- c("date","brent")

# Merge data and eurusd: (date, product, avgprice, rate)
# ==============================================================================
data <- merge(x = dataD, y = eurusd, all.x=TRUE)
# Fill NAs of Saturdays and Sundays with the last know rate 
data <- fillNA(data, "rate")

# Merge data with brent: (date, product, avgprice, rate)
newdata <- merge(x=data, y=brent, by="date", all.x=TRUE)
newdata <- fillNA(newdata, "brent")
data    <- newdata
rm(newdata, dataSorted, dataD, brent)
# Graph
# ==============================================================================
xyplot(avgprice~ date | product, type="l", data=data, 
       cex.main = 1.5, main="Prices across time", xlab="Date", ylab="Price")

plot of chunk unnamed-chunk-1

diesel <- data[data$product=="DIESEL e+",]
#dev.off()
ggplot(data, aes(x=avgprice, fill=product))+geom_density()

plot of chunk unnamed-chunk-1

graficar(data, rate, "eurusd rate")

plot of chunk unnamed-chunk-1

graficar(diesel, avgprice, "diesel price")
The following objects are masked from data (position 3):

    avgprice, brent, date, product, rate

plot of chunk unnamed-chunk-1

xyplot(avgprice ~ date, data=diesel, type="l", main="Diesel e+ price", )

plot of chunk unnamed-chunk-1

xyplot(rate ~ date, data=diesel, type="l", main="EURUSD")

plot of chunk unnamed-chunk-1

# Correlation and linear Regression price vs eurusd
# ==============================================================================
attach(diesel)
The following objects are masked from data (position 3):

    avgprice, brent, date, product, rate
The following objects are masked from data (position 4):

    avgprice, brent, date, product, rate
cor(avgprice, rate)
[1] -0.2395
cor.test(avgprice,rate)

    Pearson's product-moment correlation

data:  avgprice and rate
t = -4.508, df = 334, p-value = 9.046e-06
alternative hypothesis: true correlation is not equal to 0
95 percent confidence interval:
 -0.3379 -0.1360
sample estimates:
    cor 
-0.2395 
fit <- lm(avgprice ~ rate , data=diesel)
summary(fit)

Call:
lm(formula = avgprice ~ rate, data = diesel)

Residuals:
     Min       1Q   Median       3Q      Max 
-0.07610 -0.02711 -0.00105  0.02678  0.06337 

Coefficients:
            Estimate Std. Error t value Pr(>|t|)    
(Intercept)   1.7754     0.0844   21.04   <2e-16 ***
rate         -0.2929     0.0650   -4.51    9e-06 ***
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Residual standard error: 0.0339 on 334 degrees of freedom
Multiple R-squared:  0.0574,    Adjusted R-squared:  0.0545 
F-statistic: 20.3 on 1 and 334 DF,  p-value: 9.05e-06
fit.table = xtable(fit)
print(fit.table, floating=FALSE)
% latex table generated in R 3.1.0 by xtable 1.7-3 package
% Sun Jun 22 20:03:39 2014
\begin{tabular}{rrrrr}
  \hline
 & Estimate & Std. Error & t value & Pr($>$$|$t$|$) \\ 
  \hline
(Intercept) & 1.7754 & 0.0844 & 21.04 & 0.0000 \\ 
  rate & -0.2929 & 0.0650 & -4.51 & 0.0000 \\ 
   \hline
\end{tabular}
layout(matrix(c(1,2,3,4),2,2)) # optional 4 graphs/page 
plot(fit)

plot of chunk unnamed-chunk-1

par(def.par)

plot(rate, avgprice, col="blue", xlab="EURUSD rate", ylab="Diesel e+ Price", pch=21)
abline(fit, col="red")

plot of chunk unnamed-chunk-1

# Correlation and linear Regression price vs brent
# ==============================================================================
cor(avgprice, brent)
[1] 0.7727
cor.test(avgprice,brent)

    Pearson's product-moment correlation

data:  avgprice and brent
t = 22.25, df = 334, p-value < 2.2e-16
alternative hypothesis: true correlation is not equal to 0
95 percent confidence interval:
 0.7258 0.8126
sample estimates:
   cor 
0.7727 
fit <- lm(avgprice ~ brent , data=diesel)
summary(fit)

Call:
lm(formula = avgprice ~ brent, data = diesel)

Residuals:
    Min      1Q  Median      3Q     Max 
-0.0580 -0.0159  0.0004  0.0153  0.0576 

Coefficients:
            Estimate Std. Error t value Pr(>|t|)    
(Intercept)  0.81320    0.02618    31.1   <2e-16 ***
brent        0.00533    0.00024    22.2   <2e-16 ***
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Residual standard error: 0.0222 on 334 degrees of freedom
Multiple R-squared:  0.597, Adjusted R-squared:  0.596 
F-statistic:  495 on 1 and 334 DF,  p-value: <2e-16
layout(matrix(c(1,2,3,4),2,2)) # optional 4 graphs/page 
plot(fit)

plot of chunk unnamed-chunk-1

par(def.par)

plot(brent, avgprice, main="Single lineal regression price = f(brent price)", col="blue",xlab="Brent Price in USD", ylab="Diesel e+ Price", pch=21)
abline(fit,col="red")

plot of chunk unnamed-chunk-1

# Correlation and linear Regression price vs eurusd+brent
# ==============================================================================
cor(avgprice, brent/rate)
[1] 0.7827
cor.test(avgprice,brent/rate)

    Pearson's product-moment correlation

data:  avgprice and brent/rate
t = 22.98, df = 334, p-value < 2.2e-16
alternative hypothesis: true correlation is not equal to 0
95 percent confidence interval:
 0.7375 0.8209
sample estimates:
   cor 
0.7827 
fit <- lm(avgprice ~ brent+rate , data=diesel)
summary(fit)

Call:
lm(formula = avgprice ~ brent + rate, data = diesel)

Residuals:
     Min       1Q   Median       3Q      Max 
-0.05677 -0.01462 -0.00146  0.01610  0.06022 

Coefficients:
             Estimate Std. Error t value Pr(>|t|)    
(Intercept)  1.147007   0.058881   19.48  < 2e-16 ***
brent        0.005267   0.000227   23.19  < 2e-16 ***
rate        -0.251804   0.040284   -6.25  1.3e-09 ***
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Residual standard error: 0.021 on 333 degrees of freedom
Multiple R-squared:  0.639, Adjusted R-squared:  0.637 
F-statistic:  295 on 2 and 333 DF,  p-value: <2e-16
layout(matrix(c(1,2,3,4),2,2)) # optional 4 graphs/page 
plot(fit)

plot of chunk unnamed-chunk-1

par(def.par)


# Correlation and linear Regression price vs brent/eurusd
# ==============================================================================
brentEUR <- brent/rate
fit <- lm(avgprice ~ brentEUR , data=diesel)
summary(fit)

Call:
lm(formula = avgprice ~ brentEUR, data = diesel)

Residuals:
     Min       1Q   Median       3Q      Max 
-0.05979 -0.01446  0.00052  0.01729  0.06167 

Coefficients:
            Estimate Std. Error t value Pr(>|t|)    
(Intercept) 0.876447   0.022594    38.8   <2e-16 ***
brentEUR    0.006165   0.000268    23.0   <2e-16 ***
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Residual standard error: 0.0217 on 334 degrees of freedom
Multiple R-squared:  0.613, Adjusted R-squared:  0.611 
F-statistic:  528 on 1 and 334 DF,  p-value: <2e-16
layout(matrix(c(1,2,3,4),2,2)) # optional 4 graphs/page 
plot(fit)

plot of chunk unnamed-chunk-1

par(def.par)
plot(brent/rate, avgprice, xlab="Brent in Euros", col="blue",ylab="Diesel e+ Price", pch=21)
abline(fit, col="red")

plot of chunk unnamed-chunk-1