Problem 3.

preamble

cat("\014") 

rm(list=ls())
if(!require(pacman)) install.packages('pacman')
## Loading required package: pacman
pacman::p_load(quantmod,lattice,timeSeries,rugarch,MASS,neuralnet,ppcor,locfit,bootstrap,bisoreg,xts,nloptr,zoo,reshape2,ggplot2,dplyr,fBasics, dygraphs)
  1. Convert price data into 1 minute log-returns data
#read the data and convert it to xts object
data <- read.csv("./dataHW5csv.csv", header=T)
data_df <- data.frame(data[,2:393])
row.names(data_df) <- data[,1]

#omit missing values and convert it to xts object
#data_df <- data_df[complete.cases(data_df),]
data_df <- xts(data_df,order.by = as.Date(row.names(data_df),format = '%m/%d/%Y'))

#calculate log returns
returns1min <- 100*diff(log(t(data_df[,2:392])))
returns1min <- t(returns1min)
returns1min <- xts(returns1min,order.by = as.Date(index(data_df)),format = '%m/%d/%Y')
  1. On every day estimate the partial autocorrelation function of order 1 to 10
#calculate the PACF in for loop
pacf <- list()
for(i in 1:10){
  pacf[[i]]<-apply.daily(returns1min, function(x) 
    pacf(as.numeric(x),10, plot=F, na.action=na.pass)$acf[i])
}
names(pacf) <- paste("order", 1:10, sep="")
pacf <- data.frame(pacf)
  1. Plot the box-plots of these partial autocorrelation functions for every order
#melt data in order to draw it on one plot
d_melt<-melt(pacf, id.vars = NULL)

#create the plot in ggplot2
ggplot(d_melt, aes(x=variable, y=value)) + 
  geom_boxplot(trim=F, fill="lightblue", outlier.colour="pink") + 
  stat_summary(fun.data="mean_sdl", size=0.3, fun.args=list(mult=1), geom="pointrange", color="white") +
  theme_minimal() + theme(axis.title.x=element_blank(), axis.title.y=element_blank())
## Warning: Ignoring unknown parameters: trim
## Warning: Removed 220 rows containing non-finite values (stat_boxplot).
## Warning: Removed 220 rows containing non-finite values (stat_summary).

Yes, the plot shows that generally the first order partial-autocorrelation negative on a typical day in sample.

  1. Compute open, low, high, close price for every day
#get price data
prices1min <- data.frame(data[,3:393])
row.names(prices1min) <- data[,1]

#create OLHC
prices1min <- data.frame(
  open=prices1min[,1],
  low=apply(prices1min, 1, function(x) min(x, na.rm=T)),
  high=apply(prices1min, 1, function(x) max(x, na.rm=T)),
  close=prices1min[,ncol(prices1min)])
  1. Estimate the daily volatilities using the OLHC estimator and plot it over time
#calculate the sigmattilde^2
T <- dim(prices1min)[1]
f <- 0.66
O <- prices1min$open
H <- prices1min$high
L <- prices1min$low
C <- prices1min$close
sigmattilde2 <- 0.5*(H-L)^2-(2*log(2)-1)*(C-O)^2

#calculate the sigmathat^2
sigmathat2 <- 0.12*(O[2:T]-C[1:(T-1)])^2/f+0.88*sigmattilde2[2:T]/(1-f)

#make the volatilities as xts object
volatilities <- data.frame(sigmathat2)
row.names(volatilities) <- row.names(prices1min)[2:T]
volatilities <- xts(volatilities,order.by = as.Date(row.names(volatilities),format = '%m/%d/%Y'))

#plot the volatilities over time
dygraph(volatilities)
  1. Plot the volatility estimates from (e) against the corresponding first order partialautocorrelation from (b).
#create the dataset
vol_pacf<-data.frame(
  pacf=pacf$order1[2:nrow(pacf)],
  volatility=as.numeric(volatilities)
  )

#plot in ggplot2
ggplot(vol_pacf,aes(x=pacf, y=volatility)) + geom_line(color="blue") + theme_minimal()
## Warning: Removed 22 rows containing missing values (geom_path).