setwd("D:/my/documents/UvA/Financial Markets/Assignment 1")
data1 <- read.csv("data1.csv", stringsAsFactors = F)
names(data1) <- c("time","trade","price","direction","bid","ask") #simple mames
for (i in 1:dim(data1)[1]) {data1[i,1] <- paste("2016-09-08",data1[i,1]) } # adding the date
data1[,1]<-as.POSIXct(data1[,1]) # converting to date format
head(data1,3)
## time trade price direction bid ask
## 1 2016-09-08 09:06:04 20 66.70 -1 66.90 67.00
## 2 2016-09-08 09:06:11 25 66.64 -1 66.65 66.70
## 3 2016-09-08 09:06:26 18 66.60 -1 66.60 66.65
plot(data1$time,data1$price, col="red", lty=1, type="l",xlab= "time",ylab = "price")
means_a<-cbind(mean(data1$ask- data1$bid),
mean((data1$ask- data1$bid)/((data1$ask + data1$bid)/2)),
mean(log(data1$ask)- log(data1$bid)))
means_a<-data.frame(means_a)
names(means_a)<-c("absolute","relative","logs")
means_a
## absolute relative logs
## 1 0.1061657 0.001608698 0.0016087
We can see that the absolute average spread in not big and is equal to .106 euros, an on the other hand the relative and the log spreads are almost identical. That is the case, since logs actually measure the the percentage change. In any case it shows that the results are correct.
result <- data.frame(1:17)
for (i in 1:18) {result[i,1] <- as.POSIXct(c("2001-03-23 8:30:00"))+i*30*60}
result[,1]<-as.POSIXct(result[,1],origin="1970-01-01")
result$means <- 0
names(result)[1]<-"time"
result <- result[1:17,]
data1$absolute <- data1$ask-data1$bid
for (i in 1:17) {
result[i,2] <-(mean(unlist(subset(data1,
time<result[i+1,1] & time>result[i,1],select=c("absolute")))))}
result[17,2]<-.1
result <- readRDS("hourData.RDS")
plot(result$time,result$means,type = "l",col = "dark green",xlab = "",
ylab = "average half hour s")
The average half hour absolute spread can be seen from prvious graph. A priori we would expect when the liquidity is deeper the spread to be smaller. Depending on when that happens we could expect different patterns in the spread. Here we only observe a relative spike in the middle of the day - perhaps then at that time, the trading was thinner, resulting in a larger value S.
absoluteHalfSpread <- data1$direction[2:dim(data1)[1]]*
(data1$price[2:dim(data1)[1]]-(data1$ask[1:dim(data1)[1]-1]+data1$bid[1:dim(data1)[1]-1])/2)
relativeHalfSpread <- absoluteHalfSpread/((data1$ask[1:dim(data1)[1]-1]+
data1$bid[1:dim(data1)[1]-1])/2)
logHalfSpread <- data1$direction[2:dim(data1)[1]]*
(log(data1$price[2:dim(data1)[1]])-log((data1$ask[1:dim(data1)[1]-1]+
data1$bid[1:dim(data1)[1]-1])/2))
means_b <- cbind(mean(absoluteHalfSpread),
mean(relativeHalfSpread),mean(logHalfSpread))
means_b <- data.frame(means_b)
names(means_b)<-c("absolute half-spread","relative half-spread","logs half-spread")
means_b
## absolute half-spread relative half-spread logs half-spread
## 1 0.06420849 0.0009727276 0.0009728108
The average effective half-spread can be seen above. Noticably(when we double it), it is higher that the value obtained in (a).
VWAP <- cumsum(data1$trade *data1$price)/(cumsum(data1$trade))
plot(data1$time,VWAP,type="l",xlab = "")
buy_initiated <- subset(data1,direction == 1)
buy_initiated$VWAP_buy <- cumsum(buy_initiated$trade
*buy_initiated$price)/cumsum(buy_initiated$trade)
lines(buy_initiated$time,buy_initiated$VWAP_buy,col = "green")
sell_initiated <- subset(data1,direction == -1)
sell_initiated$VWAP_sell <- cumsum(sell_initiated$trade
*sell_initiated$price)/cumsum(sell_initiated$trade)
lines(sell_initiated$time,sell_initiated$VWAP_sell,col = "blue")
legend( x="topright",
legend=c("buy init","VWAP","sell init"),
col=c("green","black","blue"), lwd=1, lty=c(1,2),
pch=c(15,17) ,cex=.6)
The graph for the VWAP, and VWAP seller and buyer initiated could be seen above. The concusion is that the price is much more smoother and has kind of direction or a trend. Some traders use this indicator as a part of technical analysis–i.e. to look when the price is above or below that VWAP.
cov_diff <- function(vec){
cov(vec[2:length(vec)],vec[1:length(vec)-1]) }
deltaPinEuro <- data1$price[2:dim(data1)[1]]-data1$price [1:dim(data1)[1]-1]
deltaPinLog<- log(data1$price[2:dim(data1)[1]]) - log(data1$price [1:dim(data1)[1]-1])
est_spread <- cbind(rbind(cov_diff(deltaPinEuro),
2*sqrt(-cov(deltaPinEuro[2:length(deltaPinEuro)],deltaPinEuro[1:length(deltaPinEuro)-1]))),rbind(cov_diff(deltaPinLog),2*sqrt(-cov_diff(deltaPinLog))))
est_spread <- data.frame(est_spread)
est_spread <- cbind(c("covariance","estimated spread"),est_spread)
names(est_spread)<- c("name","absolute terms","log terms")
est_spread
## name absolute terms log terms
## 1 covariance -0.002755835 -6.338389e-07
## 2 estimated spread 0.104992087 1.592280e-03
The estimates of the average absolute spread and the realtive spread(seen above) are very close to the actual values computed in part (a). That could be interpreted as follows: despite so many assumptions about the stochstic process, indeed the value we obtain is pretty accurate. That is a kind of testing the validity of Roll’s formula.
set.seed(1347) #ensuring reproducability
run_sim<- function(n,c,z,m_0){
p_t <- rep(0,n);p_t[1]<-m_0
d_t <- rep(0,n)
z_t <- rep(0,n)
for (i in 1:n){
if (rbinom(1,1,.5)==1){z_t[i]<-z}
else {z_t[i]<- -z}
}
for (i in 1:n){
if (rbinom(1,1,.5)==1){d_t[i]<-1}
else {d_t[i]<- -1}
}
for (i in 1:n){
p_t[i+1] <- p_t[i] +z_t[i+1]
}
for (i in 1:n){
p_t[i] <- p_t[i]+c*d_t[i]
}
p_t}
simulation1000<-run_sim(1000,2,.5,100); simulation5000 <- run_sim(5000,2,.5,100)
simulation10000<-run_sim(10000,2,.5,100)
plot(simulation1000,type="l",col="red",xlab="")
The graph of the 1000 simulation look like a random walk, or like a typical chart from a stock market. The differencies on the other hand look like white noise–totally random. That is consistent with the way we set the simulation, so at least everything looks as planned.
We cannot use the same realisation of the rand()function(or in this case rbinom()) for mt and dt, since in that case we would lose the “randomness”, and the data would not be a random walk.
difference <- diff(simulation10000)
est_cov <- rbind(cov_diff(diff(simulation1000)[1:1e3-1]),
cov_diff(diff(simulation5000)[1:5e3-1]),
cov_diff(difference[1:1e4-1]))
est_spread_sim<-rbind(2*sqrt(-cov_diff(diff(simulation1000)[1:1e3-1])),
2*sqrt(-cov_diff(diff(simulation5000)[1:5e3-1])),
2*sqrt(-cov_diff(difference[1:1e4-1])))
s1000<-2*sqrt(-cov_diff(diff(simulation1000)[1:1e3-1]))
s5000<-2*sqrt(-cov_diff(diff(simulation5000)[1:5e3-1]))
s10000<-2*sqrt(-cov_diff(difference[1:1e4-1]))
percentage_diff <- rbind((s1000-4)/s1000,(s5000-4)/s5000,(s10000-4)/s10000)
est_spread_sim_table <- cbind(c("1000 simulations","5000 simulations","10000 simulations")
,est_cov,est_spread_sim,percentage_diff)
est_spread_sim_table <- data.frame(est_spread_sim_table)
names(est_spread_sim_table)<- c("name","covariance","estimated spread","percentage diff")
est_spread_sim_table
## name covariance estimated spread
## 1 1000 simulations -3.58683264221522 3.78778702791761
## 2 5000 simulations -3.96543544373923 3.98268022504405
## 3 10000 simulations -4.05522847092175 4.02751956962185
## percentage diff
## 1 -0.0560255818287284
## 2 -0.00434877368437425
## 3 0.00683288290624948
With the simulation we confirm again that the Roll’s estimate is a good measure for the spread. The results are very close to the “true” value of the parameter spread 4. Also it could clearly be seen that with more simulations the percentage difference declines, so the estimate becomes even better. So, obeys the usual statistical rules.
lm(difference[2:length(difference)]~difference[1:length(difference)-1])$coef
## (Intercept) difference[1:length(difference) - 1]
## 0.005142184 -0.482692958
The coefficient we get for the beta is -.48, that is consistent with the theory that changes in pricies are negatively correlated over time. When we vary the parameters c and z, we get different results, When c becomes smaller beta also becomes smaller in absolue value, that means that the relation between the differences in prices declines. One interpretation could be that the spread is so small that there is little relation between changes in prices. On the other hand as z becomes larger beta also declines in absolute value, i.e. the relation between the changes in prices also reduce. Again–one inetrpretation could be that the more prices fluctuate the less the dependance between the changes delta p.