Preliminaries

cat("\014") 

rm(list=ls())

#Load the required packages.
if(!require(pacman)) install.packages('pacman')
## Loading required package: pacman
pacman::p_load(xts,nloptr,zoo,dygraphs,plotly,magrittr,webshot,fBasics,dplyr,DescTools,ggplot2)

Using CLOSE prices for all the stocks compute the log-returns.

#load all data and create variables (stockname, date, and returns)
ticker <- read.table('./data/Tickers.txt', header=FALSE)
prices<-do.call(rbind, lapply(as.character(ticker$V1[2:dim(ticker)[1]]), function(X) 
  {data.frame(NAME=basename(X), 
              read.csv(paste('./data/', X,".txt",sep =''))[,1],
              RETURNS=c(NA,100*diff(log(read.csv(paste('./data/', X,".txt",sep =''))[,5]),lag=1)))}))
##compute the percentage log returns here to make sure NA are created for each dataset's first return value

#omit NA values
returns <- na.omit(prices)

For all the log-returns estimate the sample mean, variance, standard deviations, median, skewness, and excess kurtosis.

#compute the summary statistics (mean, skewness, kurtosis and their CIs)
summary<-returns %>% 
  group_by(NAME) %>% 
  summarise(mean=basicStats(RETURNS)[7,1],
            mean.lci=basicStats(RETURNS)[11,1],mean.uci=basicStats(RETURNS)[12,1],
            mediann=basicStats(RETURNS)[8,1],
            variance=basicStats(RETURNS)[13,1],
            skew=Skew(RETURNS,conf.level=0.95,ci.type='norm')[1],
            skew.lci=Skew(RETURNS,conf.level=0.95, ci.type='norm')[2],
            skew.uci=Skew(RETURNS,conf.level=0.95,ci.type='norm')[3],
            kurt=Kurt(RETURNS,conf.level=0.95,ci.type='norm')[1],
            kurt.lci=Kurt(RETURNS,conf.level=0.95,ci.type='norm')[2],
            kurt.uci=Kurt(RETURNS,conf.level=0.95,ci.type='norm')[3])

Provide a plot of the estimated means together with 95% confidence intervals. How many means are significantly different from zero at 5% significance level?

#create a stock index
summary$stock<-c(1:100)

#plot the mean and its confidence interval ribbon
ggplot(summary,aes(stock)) + theme_minimal() +
  geom_line(aes(y=mean), colour="blue") + 
  geom_ribbon(aes(ymin=mean.lci, ymax=mean.uci), alpha=0.15)

#get the confidence interval of t-test
ci<-t.test(summary$mean)$conf.int
table(summary$mean>ci[1] & summary$mean<ci[2])
## 
## FALSE  TRUE 
##    79    21
##21 means are significantly different from zero at 5% significance level

Provide a plot of the estimated skewness together with 95% confidence intervals. How many skewness are significantly different from zero at 5% significance level?

#plot the skewness and its confidence interval ribbon
ggplot(summary,aes(stock)) + theme_minimal() +
  geom_line(aes(y=skew), colour="red") + 
  geom_ribbon(aes(ymin=skew.lci, ymax=skew.uci), alpha=0.15)

#get the confidence interval of t-test
ci2<-t.test(summary$skew)$conf.int
table(summary$skew>ci2[1] & summary$skew<ci2[2])
## 
## FALSE  TRUE 
##    57    43
##43 skewness are significantly different from zero at 5% significance level

Provide a plot of the estimated excess kurtosis together with 95% confidence intervals. How many excess kurtosis are significantly different from zero at 5% significance level?

#plot the kurtosis and its confidence interval ribbon
ggplot(summary,aes(stock)) + theme_minimal() +
  geom_line(aes(y=kurt), colour="green") + 
  geom_ribbon(aes(ymin=kurt.lci, ymax=kurt.uci), alpha=0.15)

#get the confidence interval of t-test
ci3<-t.test(summary$kurt)$conf.int
table(summary$kurt>ci3[1] & summary$kurt<ci3[2])
## 
## FALSE  TRUE 
##    70    30
##30 skewness are significantly different from zero at 5% significance level

Assume that there are 252 trading days in every year, use this number to annualize the estimates of the daily mean return and daily standard deviation.

#calculate annualized daily mean return and standard deviation
daily<-returns %>% group_by(NAME) %>% 
  summarise(daily.mr=sqrt(252)*mean(RETURNS),daily.sd=sqrt(252)*sd(RETURNS))

Sharpe ratio is a measure of risk adjusted performance of the asset. It is a ratio of annualized mean return and annualized standard deviation. Please compute the Sharpe ratio for every stock.

#calculate sharpe ratio
daily$ratio<-daily$daily.mr/daily$daily.sd

Plot these Sharpe ratios. Which asset has the best performance in terms of Sharpe ratio?

#create a stock index
daily$stock<-c(1:100)

#plot the sharpe ratios
ggplot(daily, aes(x=stock, y= ratio, label=NAME)) + geom_point(color="blue") +
  geom_text(aes(label=NAME),hjust=-0.5, vjust=0,cex=3) + theme_minimal() 


  1. gc2668@columbia.edu