companylist <- read.csv("~/Desktop/York Econ/ECON5520/companylist.csv")
df <- companylist[,1:4]
df <- df %>% dplyr::filter(!grepl('ETF', Name)) %>% mutate(Quantile = ntile(MarketCap, n=5))
df.small <- df %>% dplyr::filter(Quantile=='1')
df.mid <- df %>% dplyr::filter(Quantile=='3')
df.large <- df %>% dplyr::filter(Quantile=='5')
set.seed(10)
df1 <- sample_n(df.small, 10)
df2 <- sample_n(df.mid, 10)
set.seed(99)
df3 <- sample_n(df.large, 10)
kable(df1)
| Symbol | Name | LastSale | MarketCap | Quantile |
|---|---|---|---|---|
| SLVO | Credit Suisse AG | 6.7100 | 22487847 | 1 |
| STAF | Staffing 360 Solutions, Inc. | 1.6500 | 13437877 | 1 |
| NAOV | NanoVibronix, Inc. | 4.2400 | 18140988 | 1 |
| FTEK | Fuel Tech, Inc. | 1.3500 | 32630290 | 1 |
| DCAR | DropCar, Inc. | 0.5600 | 5438969 | 1 |
| CTIB | CTI Industries Corporation | 2.8200 | 10464879 | 1 |
| ATAI | ATA Inc. | 1.1400 | 12040866 | 1 |
| KTOV | Kitov Pharma Ltd. | 1.1400 | 11957951 | 1 |
| CLIR | ClearSign Combustion Corporation | 1.0100 | 26964234 | 1 |
| CDMOP | Avid Bioservices, Inc. | 25.8345 | 18084150 | 1 |
kable(df2)
| Symbol | Name | LastSale | MarketCap | Quantile |
|---|---|---|---|---|
| OXLC | Oxford Lane Capital Corp. | 10.06 | 383638372 | 3 |
| LNDC | Landec Corporation | 12.33 | 359669158 | 3 |
| ALCO | Alico, Inc. | 30.00 | 223884090 | 3 |
| ALBO | Albireo Pharma, Inc. | 30.79 | 368554083 | 3 |
| RMTI | Rockwell Medical, Inc. | 4.97 | 283178950 | 3 |
| FRTA | Forterra, Inc. | 4.81 | 308828955 | 3 |
| PRTK | Paratek Pharmaceuticals, Inc. | 6.63 | 214046911 | 3 |
| AVCO | Avalon GloboCare Corp. | 3.57 | 260755481 | 3 |
| WOOD | iShares S&P Global Timber & Forestry Index Fund | 64.09 | 296095800 | 3 |
| PETS | PetMed Express, Inc. | 22.28 | 460614960 | 3 |
kable(df3)
| Symbol | Name | LastSale | MarketCap | Quantile |
|---|---|---|---|---|
| COLM | Columbia Sportswear Company | 103.40 | 7052344990 | 5 |
| SSB | South State Corporation | 70.87 | 2506675727 | 5 |
| AGNC | AGNC Investment Corp. | 17.58 | 9428841237 | 5 |
| CSCO | Cisco Systems, Inc. | 51.30 | 225824000000 | 5 |
| MLNX | Mellanox Technologies, Ltd. | 108.98 | 5909469271 | 5 |
| CHTR | Charter Communications, Inc. | 343.75 | 77465371500 | 5 |
| OKTA | Okta, Inc. | 80.11 | 8847238169 | 5 |
| VRNT | Verint Systems Inc. | 52.34 | 3416318056 | 5 |
| SAVE | Spirit Airlines, Inc. | 56.21 | 3840771797 | 5 |
| IBOC | International Bancshares Corporation | 41.15 | 2700160331 | 5 |
small.tickers <- c("SLVO", "STAF", "NAOV", "FTEK", "DCAR", "CTIB", "ATAI", "KTOV", "CLIR", "CDMOP")
mid.tickers <- c("OXLC", "LNDC", "ALCO", "ALBO", "RMTI", "FRTA", "PRTK", "AVCO", "WOOD", "PETS")
large.tickers <- c("COLM", "SSB", "AGNC", "CSCO", "MLNX", "CHTR", "OKTA", "VRNT", "SAVE", "IBOC")
getSymbols(small.tickers, src= "yahoo", from="2017-12-29", to="2019-01-01")
## [1] "SLVO" "STAF" "NAOV" "FTEK" "DCAR" "CTIB" "ATAI" "KTOV" "CLIR" "CDMOP"
small.returns <- do.call(merge, lapply(small.tickers, function(x) dailyReturn(Ad(get(x)))))
l.small.returns <- do.call(merge, lapply(small.tickers, function(x) log((1+dailyReturn(Ad(get(x)))))))
names(small.returns) <- small.tickers
names(l.small.returns) <- small.tickers
#Mid adj P
getSymbols(mid.tickers, src= "yahoo", from="2017-12-29", to="2019-01-01")
## [1] "OXLC" "LNDC" "ALCO" "ALBO" "RMTI" "FRTA" "PRTK" "AVCO" "WOOD" "PETS"
mid.returns <- do.call(merge, lapply(mid.tickers, function(x) dailyReturn(Ad(get(x)))))
l.mid.returns <- do.call(merge, lapply(mid.tickers, function(x) log((1+dailyReturn(Ad(get(x)))))))
names(mid.returns) <- mid.tickers
names(l.mid.returns) <- mid.tickers
#Large adj P
getSymbols(large.tickers, src= "yahoo", from="2017-12-29", to="2019-01-01")
## [1] "COLM" "SSB" "AGNC" "CSCO" "MLNX" "CHTR" "OKTA" "VRNT" "SAVE" "IBOC"
large.returns <- do.call(merge, lapply(large.tickers, function(x) dailyReturn(Ad(get(x)))))
l.large.returns <- do.call(merge, lapply(large.tickers, function(x) log((1+dailyReturn(Ad(get(x)))))))
names(large.returns) <- large.tickers
names(l.large.returns) <- large.tickers
For completeness, the above section includes the process in which the 30 stocks are chosen from the NASDAQ stock listings, removing ETF’s. Each of the tables include the 10 stocks from small, medium, and large market caps; quantile 1, 3 and 5. Moreover, the process is random and a seed is set in order to replicate the sampling. Furthermore, the getSymbols command from the package ‘quantmod’ extracts 252 adjusted prices from Yahoo! Finance from the last trading day in December 2017 until the last trading day in December 2018. Finally, daily returns are calculated and complied in a data frame using the ‘do.call’ command, and lastly, ticker names are applied to each data frame.
small.returns <- small.returns[2:252,]
mid.returns <- mid.returns[2:252,]
large.returns <- large.returns[2:252,]
returns <- cbind(small.returns, mid.returns, large.returns)
l.returns <- cbind(l.small.returns, l.mid.returns, l.large.returns)
l.returns <- l.returns[2:252,]
The above code is nothing but the matrix of 251 returns and log returns.
getSymbols("SPY", src= "yahoo", from="2017-12-29", to="2019-01-01")
## [1] "SPY"
SPY.adj <- do.call(merge, lapply("SPY", function(x) Ad(get(x))))
SPY.returns <- Delt(SPY.adj)
l.SPY.returns <- log((1+Delt(SPY.adj)))
SPY.returns <- SPY.returns[2:252,]
l.SPY.returns <- l.SPY.returns[2:252,]
Furthermore, the S&P 500 is a proxy for the market portfolio and the same process is applied to generate daily returns from adjusted prices using the same time period.
jb.test.5520 <- function (x)
{
if ((NCOL(x) > 1) || is.data.frame(x))
stop("x is not a vector or univariate time series")
if (any(is.na(x)))
stop("NAs in x")
DNAME <- deparse(substitute(x))
n <- length(x)
m1 <- sum(x)/(n-1)
m2 <- sum((x - m1)^2)/(n-1)
m3 <- sum((x - m1)^3)/(n-1)
m4 <- sum((x - m1)^4)/(n-1)
b1 <- (m3/m2^(3/2))^2
b2 <- (m4/m2^2)
STATISTIC <- n * b1/6 + n * (b2 - 3)^2/24
PVAL <- 1 - pchisq(STATISTIC, df = 2)
PARAMETER <- 2
METHOD <- "Jarque Bera Test"
names(STATISTIC) <- "X-squared"
names(PARAMETER) <- "df"
structure(list(statistic = STATISTIC, parameter = PARAMETER,
p.value = PVAL, method = METHOD, data.name = DNAME),
class = "htest")
}
To provide clarity for the above code I will explain the process. The function above is taken from the package in R called ‘tseries’. However, it is important to note that each of the moments is divided by \(n\), whereas the formula in the handouts is divided by \(n-1\), as this is a consistent estimator. I have taken the code for the original function and divided the moment estimators by \(n-1\), thereby, producing a consistent estimator.
output <- lapply(1:30, function(x) jb.test.5520(returns[,x]))
l.output <- lapply(1:30, function(x) jb.test.5520(l.returns[,x]))
jb.test.output <- do.call(rbind, c(output,l.output)) %>% as.data.frame()
l.return.jb.test.table <- jb.test.output[31:60,] %>%
as.data.frame() %>%
select(-parameter, -method, -data.name) %>%
rename("P-value from Log Returns" = p.value,
"Test Statistic from Log Returns" = statistic
)
return.jb.test.table <- jb.test.output[1:30,] %>%
as.data.frame() %>%
select(-parameter, -method, -data.name) %>%
rename("P-value from Net Returns" = p.value,
"Test Statistic from Net Returns" = statistic
)
jb.test.table.all.stocks <- methods::cbind2(return.jb.test.table, l.return.jb.test.table)
returns.tickers <- c(small.tickers, mid.tickers, large.tickers) %>% as.data.frame()
q1.table <- cbind(returns.tickers, jb.test.table.all.stocks) %>%
as.data.frame()
names(q1.table)[names(q1.table) == '.'] <- 'Tickers'
#SPY JB test
spy.output <- jb.test.5520(SPY.returns)
spy.l.output <- jb.test.5520(l.SPY.returns)
SPY.list <- "SPY"
spy.table <- do.call(cbind, c(spy.output, spy.l.output)) %>%
as.data.frame() %>%
cbind(SPY.list)
spy.table <- spy.table[,c(1,3,6,8,11)] %>% as.data.frame()
spy.table <- spy.table %>% rename(Tickers = SPY.list,
"P-value from Net Returns" = p.value,
"Test Statistic from Net Returns" = statistic,
"P-value from Log Returns" = p.value.1,
"Test Statistic from Log Returns" = statistic.1)
Q1.table <- rbind(q1.table, spy.table) %>%
as.data.frame()
Q1.table$`P-value from Net Returns` <- as.numeric(Q1.table$"P-value from Net Returns")
Q1.table$`P-value from Log Returns` <- as.numeric(Q1.table$"P-value from Log Returns")
Q1.table$`Test Statistic from Net Returns` <- as.numeric(Q1.table$`Test Statistic from Net Returns`) %>%
round(2)
Q1.table$`Test Statistic from Log Returns` <- as.numeric(Q1.table$`Test Statistic from Log Returns`) %>%
round(2)
row.names(Q1.table) <- NULL
knitr::kable(Q1.table, caption = "JB Test for Normality for Log and Net Stock Returns ", digits = 3) %>%
landscape()
| Tickers | Test Statistic from Net Returns | P-value from Net Returns | Test Statistic from Log Returns | P-value from Log Returns |
|---|---|---|---|---|
| SLVO | 44.53 | 0.000 | 47.61 | 0.000 |
| STAF | 419548.68 | 0.000 | 117283.34 | 0.000 |
| NAOV | 194.88 | 0.000 | 232.91 | 0.000 |
| FTEK | 6955.90 | 0.000 | 2929.71 | 0.000 |
| DCAR | 585.67 | 0.000 | 219.15 | 0.000 |
| CTIB | 149.78 | 0.000 | 349.74 | 0.000 |
| ATAI | 243261.97 | 0.000 | 71592.76 | 0.000 |
| KTOV | 6972.96 | 0.000 | 2510.12 | 0.000 |
| CLIR | 99.38 | 0.000 | 69.62 | 0.000 |
| CDMOP | 1723.56 | 0.000 | 1694.91 | 0.000 |
| OXLC | 455.85 | 0.000 | 435.76 | 0.000 |
| LNDC | 18.15 | 0.000 | 19.32 | 0.000 |
| ALCO | 47.60 | 0.000 | 42.08 | 0.000 |
| ALBO | 32.75 | 0.000 | 23.04 | 0.000 |
| RMTI | 224.82 | 0.000 | 148.58 | 0.000 |
| FRTA | 83.40 | 0.000 | 146.21 | 0.000 |
| PRTK | 121.40 | 0.000 | 133.01 | 0.000 |
| AVCO | 111825.72 | 0.000 | 11146.39 | 0.000 |
| WOOD | 18.37 | 0.000 | 21.65 | 0.000 |
| PETS | 139.75 | 0.000 | 195.30 | 0.000 |
| COLM | 90.15 | 0.000 | 114.13 | 0.000 |
| SSB | 131.55 | 0.000 | 164.22 | 0.000 |
| AGNC | 17.11 | 0.000 | 19.14 | 0.000 |
| CSCO | 13.52 | 0.001 | 14.43 | 0.001 |
| MLNX | 1002.60 | 0.000 | 718.91 | 0.000 |
| CHTR | 182.53 | 0.000 | 283.95 | 0.000 |
| OKTA | 171.85 | 0.000 | 149.07 | 0.000 |
| VRNT | 1322.67 | 0.000 | 937.98 | 0.000 |
| SAVE | 460.60 | 0.000 | 306.69 | 0.000 |
| IBOC | 50.74 | 0.000 | 60.84 | 0.000 |
| SPY | 114.38 | 0.000 | 113.56 | 0.000 |
The above table displays the results from the Jarque-Bera Test for 30 stocks and the S&P 500, which from here on now, will be referred to as the market portfolio. The Jarque–Bera test is a goodness-of-fit test which examines whether the distribution of the returns data has skewness and kurtosis matching that of a normal distribution. The test is based on the following properties: the normal distribution has an expected skewness of 0 and an expected excess kurtosis of 0, equivalently, kurtosis of 3.
Consequently, the null hypothesis is, skewness and excess kurtosis are jointly equal to 0. The alternative test is that at least one of the parameters is not 0. The JB statistic asymptotically is \(\chi^{2}\) distributed with 2 degrees of freedom, and therefore, the test statistic is tested against the value 5.99 at the 5% significance level.
All stocks have a p-value close to zero and test statistics much greater than 0, therefore, rejecting the null hypothesis at all confidence levels that each is normally distributed. Moreover, the same conclusion is drawn from the log returns. This is unsurprising, as for small returns, the log return is equal to the net returns. However, in the random walk model, we assume log returns are log-normally distributed. This assumption shall be analyzed in-depth in question 4 of this project. Another surprising result is that the small market cap stocks have test statistics that are relatively small compared to medium and large market cap stocks. Intuitively, small cap stocks are more volatile, and thus, would have more returns in the tails of the probability distribution of daily returns. As a result, excess kurtosis would not be equal to 0 in this case. Finally, the market portfolio is also, on average, not normally distributed.
V = cov(returns)
mu = colMeans(returns)
mu.prime = t(mu)
V.inv = solve(V)
one=rep(1, 30)
one.prime = t(one)
A=mu.prime%*%V.inv%*%one
B=mu.prime%*%V.inv%*%mu
C=one.prime%*%V.inv%*%one
D=B%*%C-A^2
#MVP
min.return = A/C %>% as.numeric()
min.sd= sqrt(1/C) %>% as.numeric()
mvp <- cbind(min.return, min.sd)
EZp <- runif(n = 99, min=min.return, max = 0.005) %>% as.list()
st_p = lapply(EZp, function(EZp)((C*EZp^2-2*A*EZp+B)/D)^(1/2))
EZp<- as.numeric(EZp)
st_p <- as.numeric(st_p)
df.2a <- cbind(EZp,st_p) %>% rbind(mvp) %>% data.frame()
ggplot(df.2a) +
geom_smooth(aes(y=EZp, x=st_p)) +
labs(x="Standard Deviation", y= "Expected Return", title="Efficent Frontier") +
geom_point(aes(x=min.sd, y=min.return), colour="Red") +
ylim(-0.001, 0.007)
The efficient frontier is generated through the well known financial formulas. In the first section of the code, I generate the vector \(\mu\), which is the column means of returns for each stock, and ‘cov’ is the covariance-variance matrix of returns. \(EZ_{p}\) is a vector of 99 arbitrary portfolio returns with the minimum value being the minimum variance portfolio, the maximum is arbitrarily chosen as 0.0005. Furthermore, the locus of the efficient portfolio is derived using,
\[\begin{align} \sigma_{p}^{2} = \frac{EZ_{p}^{2}C-2~EZ_{p}A+B}{D}. \end{align}\]identical(min.sd, min(df.2a$st_p))
## [1] TRUE
Where \(EZ_{p}\) is the vector of portfolio returns. Using the ‘lapply’ command, I can input each element from \(EZ_{p}\) into (1) and output the risk for the arbitrary portfolio return. Moreover, taking the square root of this is nothing but the standard deviation, or volatility. Plotting this against \(\sigma_{p}\) will generate all efficient portfolios, as the minimum value of \(EZ_{p}\) is \(\frac{A}{C}\), which is the minimum variance portfolio. The shape of the efficient frontier does not contain as much curvature as in the class examples which is most likely due to the nature of the returns as the average returns of the 30 stocks contained many negative values. Furthermore, the MVP is the point on the graph, a logical test is done if the MVP has the lowest variance in the data matrix of arbitrary returns and volatility generated through the equation above. The test is true, the MVP standard deviation and the minimum standard deviation of the matrix are the same. Moreover, the minimum return is negative and close to 0, and less than the risk-free rate of return.
rf <- 0.02/252
rf.vector <- rep(rf, 100)
df.2b <- cbind(df.2a, rf.vector) %>% as.data.frame() %>% mutate(sharpe.ratio = (EZp-rf)/st_p)
max(df.2b$sharpe.ratio)
## [1] 0.3797931
After including the risk-free rate of return, which now adds a dimension of risk trade-off in generating a portfolio that includes risky and a risk-free asset. As mentioned previously, the MVP is less than the risk-free rate of return which should produce a Sharpe ratio which is above the lower portion of the efficient frontier. Interestingly, this would suggest that investing in a combination of the risk-free asset and the risky portfolios is better at all levels of the efficient frontier. The risk-free rate of return is ‘rf’, and after generating a vector with dimensions 100 by 1, combining this with \(EZ_{p}\) and \(\sigma_{p}\) will produce a data matrix, in which the Sharpe ratio is calculated. The Sharpe ratio is calculated using \(\frac{EZ_{p} - \mu_{f}}{\sigma_{p}}\), where \(\mu_{f}\) is the risk-free rate of return. Finally, because the slope of the capital markets line is nothing but the maximum Sharpe ratio, the command ‘max’ finds the maximum value of the vector of Sharpe ratios.
ggplot(df.2b) +
geom_smooth(aes(y=EZp, x=st_p)) +
geom_abline(intercept=rf, slope =max(df.2b$sharpe.ratio)) +
labs(x="Standard Deviation", y= "Expected Return", title = "Efficent Frontier and CML") +
geom_point(aes(x=min.sd, y=min.return), colour="Red") +
xlim(0, 0.02) +
ylim(-0.0015, 0.007)
Plotting the line with intercept equal to \(\mu_{f}\) and slope equal to the maximum Sharpe ratio, the capital markets line is plotted. The tangency portfolio is near the end of the frontier. Moreover, in the CAPM theory, this portfolio is the market portfolio, as everyone would like to hold this portfolio.
rf.mat <- matrix(rf, nrow = 251, ncol = 30)
rf.vector <- rep(rf, 251)
ER_i <- returns - rf.mat
ER_m <- SPY.returns - rf.vector
df.all <- cbind(ER_i, ER_m) %>% data.frame()
capm.test <- lm(ER_i ~ ER_m, df.all)
B.a.all <- coefficients(capm.test) %>% t()
B.all <- B.a.all[,2]
To test the capital asset pricing model (CAPM) I will utilize 2 methods. The first is the Wald Test and the second is the Fama-MacBeth cross-sectional regression approach. To start, excess returns are calculated for each element in the returns matrix and also for the market portfolio. More specifically, this is the Sharpe and Lintner version of CAPM. Furthermore, CAPM is estimated using OLS instead of maximum likelihood (ML) or generalized method of moments (GMM). An argument for doing so is point estimates are of importance here, and for this project, OLS should allow all tests to be applied to the returns data. An assumption of single-factor models is the idiosyncratic error, which is the error term in the regression equation, has an expected value of 0 (due to having a small weighted portfolio) and no contemporaneous error covariation, (\(cov(\epsilon_{it}, \epsilon_{jt})=0\)). However, the last assumption is not true, and empirically, this assumption does not hold. Consequently, the standard errors are now wrong but the point estimators should be the same.
#Wald Test
a.all <- B.a.all[,1]
res <- resid(capm.test)
sigma <- (1/251)*(t(res)%*%res)
sigma.inv <- solve(sigma)
a.all.prime <- t(a.all)
varER_m <- var(ER_m)
mu.ER_m <- mean(ER_m)
j0 <- 251*(1+(mu.ER_m^2/varER_m))^(-1)*(a.all.prime%*%sigma.inv%*%a.all)
j1 <- ((251-30-1)/(251*30))*j0
j1.all<- j1 %>% as.numeric()
The first test is the Wald test. Formally, the null hypothesis is that the vector of \(\hat{\alpha_{i}}\), where \(i\) is for each stock, is equal to 0. Whereas the alternative is if not equal to 0, thereby, providing evidence against CAPM. \(\hat{\Sigma}\) is the estimated covariance-variance matrix of the OLS model, which is used to calculate \(J_{0}\), the Wald test statistic. However, because the sample is finite, \(J_{1}\) must be calculated, where instead, this is an F-test, not a Wald test. Formally, the formula for the \(J_{1}\) F-test statistic is,
\[\begin{align} J_{1} = \frac{(T-n-1)}{T~n}~[ 1 + \frac{\hat{\mu_m^2}}{\hat{\sigma^2_m}} ]^{-1}~\hat{\alpha'} ~\hat{\Sigma}^{-1}~ \hat{\alpha}. \end{align}\]Moreover, \(J_{1}\) follows an F-distribution, with \(n\) degrees of freedom in the numerator and \((T-n-1)\) degrees of freedom in the denominator, where \(n\) equals to the number of stocks in the portfolio. In this case, \(n=30\) because this is the F-test for a portfolio of all 30 stocks. Lastly, \(T\) is equal to 251, therefore, a finite sample. The reason to use \(J_{1}\) over \(J_{0}\) is due to finite samples rejecting the null hypothesis too often while using \(J_{0}\), as it is for large samples only. The results for each test are then saved to compile into a table.
kable(B.all)
| x | |
|---|---|
| SLVO | 0.1753967 |
| STAF | 1.6021898 |
| NAOV | 0.3621772 |
| FTEK | 0.5285670 |
| DCAR | 0.8664930 |
| CTIB | 0.2748523 |
| ATAI | 0.9105274 |
| KTOV | 0.9242521 |
| CLIR | 0.0323883 |
| CDMOP | 0.0358507 |
| OXLC | 0.4229842 |
| LNDC | 0.5930880 |
| ALCO | 0.1987354 |
| ALBO | 1.1597327 |
| RMTI | 0.5336933 |
| FRTA | 0.9905798 |
| PRTK | 1.3011712 |
| AVCO | 0.6507998 |
| WOOD | 0.7422248 |
| PETS | 0.6283831 |
| COLM | 0.7434855 |
| SSB | 0.7391666 |
| AGNC | 0.3036397 |
| CSCO | 1.3058718 |
| MLNX | 0.9496949 |
| CHTR | 0.9328803 |
| OKTA | 1.8785889 |
| VRNT | 1.0708232 |
| SAVE | 0.8806607 |
| IBOC | 0.8878494 |
The \(\hat{\beta}_{1,i}\)’s are displayed above.
#Fama-MacBeth
ER_i.prime <- t(ER_i)
df.b.test <- cbind(ER_i.prime, B.all) %>% data.frame()
dim(df.b.test)
## [1] 30 252
capm.test.2 <- lapply(1:251, function(x)lm(df.b.test[,x] ~ B.all, df.b.test))
B.a.all.2 <- sapply(capm.test.2, coef) %>% as.data.frame()
summaries <- lapply(capm.test.2, summary)
all.stocks.adj.r <- sapply(summaries, function(x) c(adj_r_sq = x$adj.r.squared)) %>% as.numeric()
The second CAPM test is the cross-sectional regression approach, where the process involves using \(\hat{\beta}\) in estimating parameters \(\gamma_{0}\) and \(\gamma_{1}\). This method involves 3 steps. First, the vector of \(\hat{\gamma_{0}}\) is the estimated intercepts of the regression of excess portfolio returns onto \(\hat{\beta_{1,i}}\) from the initial regression. A formal statistical test is then performed, where the null hypothesis is the vector of all 251 \(\hat{\gamma_{0}}\) are equal to 0, and the alternative is not equal to 0. If the null is rejected, this provides support against CAPM. This test is similar to the Wald test, as the objective is to find if the stocks are jointly mis-priced.
Second, the vector of 251 \(\hat{\gamma_{1}}\) is needed in order to test its relationship with \(\hat{\beta_{1,i}}\). Another statistical test is performed where the null hypothesis, \(\hat{\gamma_{1}}\) is equal to the average excess expected return of the market portfolio. Conversely, the alternative, \(\hat{\gamma_{1}}\) is not equal to the average excess expected return of the market portfolio Mathematically, this can be shown as,
\(\begin{aligned} Z_{i} &= \alpha_{0} + \beta Z_{m} +\epsilon\\ E[Z_{i}] &= \hat{\alpha_{0}} + \hat{\beta}~E[Z_{m}]\\ &= \gamma_{0} + \gamma_{1}~\hat{\beta} +\zeta \\ &= \hat{\gamma_{0}} + \hat{\gamma_{1}}~\hat{\beta} \\ &= \hat{\beta}~E[Z_{m}]. \end{aligned}\)
Intuitively, if the null hypothesis is true for steps one and two, then expected excess stock returns for \(i\) should only be a function of \(\beta_{i}\) and expected excess return of the market portfolio. In other words, the deviations of stock \(i\)’s returns are perfectly captured in the movements of market portfolio returns. Therefore, providing evidence in support of CAPM. The last step for this approach is to build the probability distribution of 251 measures of adjusted \(R^{2}\), \(\bar{R^2}_{i}\). Ideally, to support the previous claim, the lower bound of \(\bar{R^2}_{i}\) is expected to arbitrarily large (>40%) to support \(\beta_i\) capturing movements of returns in stock \(i\) with the market portfolio. The interpretation of \(\bar{R^2}_{i}\) is not the same as \(R^{2}\), but the magnitude is of importance here. All three tests are performed for a portfolio of small cap, medium cap, large cap, and a portfolio of all the stocks used. Lastly, perhaps the rejection of CAPM arises not from the theory, but from an improper proxy for the market portfolio, this is of important consideration when making conclusions after performing the tests.
A short description of the code is provided in this paragraph. First, the matrix of excess expected returns is transposed to achieve a dimension of 30x251 in preparation to column bind the estimated values. Afterwards, ‘lapply’ is used to loop each column of daily returns for all 30 stocks at once, therefore, 251 regressions with 251 \(\hat{\gamma_i}\), where \(i=0,1\). Furthermore, 251 \(\bar{R^2}\)’s are extracted from each regression from the loop ‘lapply’.
B.all.2 <- B.a.all.2[1,] %>% as.numeric()
a.all.2 <- B.a.all.2[2,]%>% as.numeric()
gamma.0.hat <- mean(a.all.2)
gamma.1.hat <- mean(B.all.2)
From the lecture notes, the expected value of \(\hat{\gamma_i}\) is nothing but the mean of the vector of fitted values from the looped regression.
n <- length(a.all.2)
sd.gamma.0 <- sqrt(sum((a.all.2 - mean(a.all.2))^2)/(n^2-n))
sd.gamma.1 <- sqrt(sum((B.all.2 - mean((B.all.2)))^2)/(n^2-n))
Again, taken from the lecture notes, this is the sample standard deviation of each estimator.
t.gamma.0.all = gamma.0.hat/sd.gamma.0
This is the first test statistic for the Fama-MacBeth approach. Moreover, this test statistic is normally distributed with \(T-1\) degrees of freedom, which is nothing but the time period, or the number of regressions estimated, which is \(251-1=250\) degrees of freedom. Simply, the critical value here is 1.96 at the 5% confidence level for all size effect tests. A more in depth analysis along with a discussion about the size effect will follow with all test statistics organized into a table.
avg.excess.return.mkt = mean(ER_m)
t.gamma.1.all = (gamma.1.hat - avg.excess.return.mkt)/sd.gamma.1
Equivalently, the same distribution holds for this test as well, where the critical value is 1.96 at the 5% confidence level.
mean.adj.all <- mean(all.stocks.adj.r)
sd.adj.all <- sqrt(sum((all.stocks.adj.r - mean(all.stocks.adj.r))^2)/(n-1))
upper.CI.all <- mean.adj.all +(1.96*(sd.adj.all)/sqrt(n))
lower.CI.all <- mean.adj.all -(1.96*(sd.adj.all)/sqrt(n))
Lastly, this is the lower and upper bound of the 95% confidence interval of the distribution of \(\bar{R^2}_{i}\).
rf.mat <- matrix(rf, nrow = 251, ncol = 10)
rf.vector <- rep(rf, 251)
ER_i <- small.returns - rf.mat
ER_m <- SPY.returns - rf.vector
df.all <- cbind(ER_i, ER_m) %>% data.frame()
capm.test <- lm(ER_i ~ ER_m, df.all)
B.a.all <- coefficients(capm.test) %>% t()
B.all <- B.a.all[,2]
ER_i.prime <- t(ER_i)
df.b.test <- cbind(ER_i.prime, B.all) %>% data.frame()
n <- ncol(df.b.test)-1
capm.test.2 <- lapply(1:n, function(x)lm(df.b.test[,x] ~ B.all, df.b.test))
B.a.all.2 <- sapply(capm.test.2, coef) %>% as.data.frame()
summaries <- lapply(capm.test.2, summary)
all.stocks.adj.r <- sapply(summaries, function(x) c(adj_r_sq = x$adj.r.squared)) %>% as.numeric()
This code and procedure is analogous to the previous case with a portfolio of all stocks. The only important difference is the dimension of the returns matrix, which is 251x10. Therefore, \(\beta\) and \(\alpha\) will be of a different length for the following tests. Another important note is this changes the value of \(n\) while performing the F-test as the critical value for the 95% confidence level is now evaluated at 10 degrees of freedom for the numerator and 241 degrees for the denominator.
kable(B.all)
| x | |
|---|---|
| SLVO | 0.1753967 |
| STAF | 1.6021898 |
| NAOV | 0.3621772 |
| FTEK | 0.5285670 |
| DCAR | 0.8664930 |
| CTIB | 0.2748523 |
| ATAI | 0.9105274 |
| KTOV | 0.9242521 |
| CLIR | 0.0323883 |
| CDMOP | 0.0358507 |
#Wald Test
a.all <- B.a.all[,1]
res <- resid(capm.test)
sigma <- (1/251)*(t(res)%*%res)
sigma.inv <- solve(sigma)
a.all.prime <- t(a.all)
varER_m <- var(ER_m)
mu.ER_m <- mean(ER_m)
j0 <- 251*(1+(mu.ER_m^2/varER_m))^(-1)*(a.all.prime%*%sigma.inv%*%a.all)
j1 <- ((251-10-1)/(251*10))*j0
j1.small <- j1 %>% as.numeric()
#1
B.all.2 <- B.a.all.2[1,] %>% as.numeric()
a.all.2 <- B.a.all.2[2,]%>% as.numeric()
#2
gamma.0.hat <- mean(a.all.2)
gamma.1.hat <- mean(B.all.2)
#3
n <- length(a.all.2)
sd.gamma.0 <- sqrt(sum((a.all.2 - mean(a.all.2))^2)/(n^2-n))
sd.gamma.1 <- sqrt(sum((B.all.2 - mean((B.all.2)))^2)/(n^2-n))
t.gamma.0.small = gamma.0.hat/sd.gamma.0
avg.excess.return.mkt = mean(ER_m)
t.gamma.1.small = (gamma.1.hat - avg.excess.return.mkt)/sd.gamma.1
mean.adj.all <- mean(all.stocks.adj.r)
sd.adj.all <- sqrt(sum((all.stocks.adj.r - mean(all.stocks.adj.r))^2)/(n-1))
upper.CI.small <- mean.adj.all +(1.96*(sd.adj.all)/sqrt(n))
lower.CI.small <- mean.adj.all -(1.96*(sd.adj.all)/sqrt(n))
Again, all the steps are identical for all stocks. Moreover, the procedure for medium cap and large cap is identical, therefore, I have chosen not to included it, apart from the \(\beta\) estimates.
kable(B.all)
| x | |
|---|---|
| OXLC | 0.4229842 |
| LNDC | 0.5930880 |
| ALCO | 0.1987354 |
| ALBO | 1.1597327 |
| RMTI | 0.5336933 |
| FRTA | 0.9905798 |
| PRTK | 1.3011712 |
| AVCO | 0.6507998 |
| WOOD | 0.7422248 |
| PETS | 0.6283831 |
kable(B.all)
| x | |
|---|---|
| COLM | 0.7434855 |
| SSB | 0.7391666 |
| AGNC | 0.3036397 |
| CSCO | 1.3058718 |
| MLNX | 0.9496949 |
| CHTR | 0.9328803 |
| OKTA | 1.8785889 |
| VRNT | 1.0708232 |
| SAVE | 0.8806607 |
| IBOC | 0.8878494 |
small.capm.test <- c(j1.small, qf(.95,df1=10, df2=(251-10-1)), t.gamma.0.small, t.gamma.1.small, lower.CI.small, upper.CI.small) %>% t()
mid.capm.test <- c(j1.mid, qf(.95,df1=10, df2=(251-10-1)), t.gamma.0.mid, t.gamma.1.mid, lower.CI.mid, upper.CI.mid) %>% t()
large.capm.test <- c(j1.large, qf(.95,df1=10, df2=(251-10-1)) , t.gamma.0.large, t.gamma.1.large, lower.CI.large, upper.CI.large) %>% t()
all.capm.test <- c(j1.all, qf(.95,df1=30, df2=(251-30-1)), t.gamma.0.all, t.gamma.1.all, lower.CI.all, upper.CI.all) %>% t()
capm.test.table <- rbind(small.capm.test, mid.capm.test, large.capm.test, all.capm.test)
row.names(capm.test.table) <- c("Small Cap", "Mid Cap", "Large Cap", "All Stocks")
colnames(capm.test.table) <- c("J1 F-Test Statistic","F-Distribution Critical Value", "Gamma 0 Test Statistic", "Gamma 1 Test Statistic", "Lower C.I", "Upper C.I")
knitr::kable(capm.test.table, caption = "CAPM Test Results for Size Effects", digits = 3) %>% landscape()
| J1 F-Test Statistic | F-Distribution Critical Value | Gamma 0 Test Statistic | Gamma 1 Test Statistic | Lower C.I | Upper C.I | |
|---|---|---|---|---|---|---|
| Small Cap | 1.131 | 1.870 | 0.434 | -0.798 | 0.006 | 0.047 |
| Mid Cap | 1.104 | 1.870 | -1.680 | 1.077 | -0.028 | 0.012 |
| Large Cap | 1.287 | 1.870 | 1.974 | -1.797 | 0.075 | 0.134 |
| All Stocks | 1.417 | 1.511 | 0.780 | -0.964 | 0.007 | 0.023 |
In the analytical portion for this question, I aim to find if there exists a size effect in testing the Sharpe and Lintner CAPM. The first test is the F-test. Interestingly, the F-test statistic for all portfolios of different market cap size is less than the critical value. Therefore, failing to reject the null hypothesis that the vector of \(\hat{\alpha}\) for each portfolio is statistically different from 0, which provides support for the Sharpe and Linter CAPM. There is an increase in F-test statistic in the large cap stock portfolio, and a further increase in the portfolio which includes all the stocks, however, both are still less than their respective critical values.
The second test for the Sharpe and Lintner CAPM is the Fama-MacBeth method. First, there is a noticeable size effect when testing if \(\hat{\gamma}_{0}\) is equal to 0. For small cap stocks, the test statistic is less than 1.96, thus failing to reject CAPM. Interestingly, however, the test statistic is less than 1.96, which fails to reject the null hypothesis at the 95% confidence level that \(\hat{\gamma}_{0}\) is equal to 0. Furthermore, \(\hat{\gamma}_{1}\) is also less than |1.96|, therefore, failing to reject the null hypothesis that \(\hat{\gamma}_{1}\) is equal to the average excess market return. Both tests provide support for CAPM, which is surprising because CAPM is thought to fail in small cap stocks. Another interesting result is the \(\hat{\gamma}_{0}\) test statistic for the medium cap stocks is larger in absolute value, again, the opposite of what is expected. However, this result is statistically insignificant. Furthermore, the test statistic for \(\hat{\gamma}_{1}\) is also statistically significant at the 95% confidence. Again, supporting evidence for CAPM for medium market cap stocks.
The most interesting result is the test statistic for \(\hat{\gamma}_{0}\), is marginally larger than 1.96. Which, suggests that at the 95% confidence level, \(\hat{\gamma}_{0}\) is statistically different from 0, which provides enough evidence to reject CAPM for large cap stocks. This is counter-intuitive to my hypothesis as theory and empirical work suggest CAPM fails in the opposite direction. Furthermore, the \(\hat{\gamma}_{1}\) test statistic is also larger in absolute value than the mid-market cap stock portfolio, but still statistically insignificant. Moreover, this conclusion is consistent, in some degree, to the F-test for \(J_{1}\), as the test statistic is the highest for the large market cap stock portfolio relative to the others. Lastly, a portfolio that includes all stocks is also considered. The F-test returns a value that is significant, however, much closer to the critical value than for previous sizes. Moreover, CAPM holds for the portfolio of all stocks as well. As noted previously, rejecting CAPM may not have to do with the theory, but with the proxy for the market portfolio. Another conclusion that could be drawn is the S&P 500 does not accurately proxy the market portfolio.
Lastly, the lower bound of the 95% confidence interval of \(\bar{R^2}\) for small cap stocks are reported to be 0.006. While this does not have a nice interpretation as \({R^2}\), it can conclude the models fit. This suggests that the excess market returns might be a poor sole factor in this model, and other factors will be needed. A result unsurprising due to the work of multi-factor models providing better goodness-of-fit. Furthermore, the large cap stocks interestingly have the highest lower bound for \(\bar{R^2}\), at 0.075.
l.returns.w.spy <- cbind(l.returns, l.SPY.returns)
rev.l.returns <- apply(l.returns.w.spy, 2, rev)
lag.l.returns <- lapply(1:31, function(x)Lag(rev.l.returns[,x], k=0:10))
df.4.1 = do.call(cbind, lag.l.returns)
df.4 <- df.4.1[11:251,]
n=nrow(df.4)
dfm = as.matrix(df.4)
The last test is related to market efficiency. A market is efficient if past prices reflect all available information for the particular asset. Random walk is a theoretical framework that has been derived from this. Mathematically, this can be shown,
\[\begin{align} log(p_{t}) = \mu + log(p_{t-1}) + \epsilon_{t}. \end{align}\]Equation (3) states that asset price at time \(t\) is a function of log asset prices of \(t-1\), a drift term \(\mu\) and the error, \(\epsilon_{t}\). Rearranging equation (3),
\(\begin{aligned} log (p_{t}-log(p) &= \mu + \epsilon_{t} \\ log(p_{t}/p_{t-1}) &= \mu + \epsilon_{t} \\ log(1+R) &= \mu + \epsilon_{t} \\ E[log(1+R)] &= \mu. \end{aligned}\)
Intuitively, this states that expected log returns can be estimated by just taking the average of the returns. The main assumption of this model is that log prices are log-normal distributed with expected value 0 and constant variance. Moreover, to avoid limited liability, we can rewrite the prices in log returns. The assumption of normality is, however, ambitious. Random walk is broken up into three different models, each iteration has more relaxed assumptions. The focus in this question is the third iteration, random walk 3. Most importantly, this model does not make any assumptions in the nature of the errors (ie. independence and constant) and the type of distribution. Therefore, a benchmark used to test for market efficiency. If random walk holds, then asset prices are only dependent on their mean value and nothing more. If not, then asset prices in time \(t\) are dependent on their mean and past returns of the asset.
To create the lagged matrix, I use the ‘Lag’ function in R, where I generate \(k\) lags for each asset, including the market portfolio, where \(k=0,...,10\). Furthermore, I use the ‘lapply’ loop to combine all the lagged matrices for efficiency. The next step will highlight the procedure needed to test for RW3.
dfm1 <- dfm[,1:11] %>% as.data.frame()
rw3.1 <- lm(dfm1[,1] ~ . -Lag.0, data=dfm1)
rho.a.all <- coefficients(rw3.1) %>% t()
rho.all <- rho.a.all[,2:11] %>% as.data.frame()
First, to test random walk one must estimate the Pearson coefficient of autocorrelation, which is defined as such,
\[\begin{align} \rho(k) = \frac{cov(r_{t},r_{t-k})}{var(r_{t})^{1/2}var(r_{t+k})^{1/2}}. \end{align}\]Assuming stationarity, \({var(r_{t})}^{1/2}var(r_{t+k})^{1/2} = var(r_{t})^{1/2}var(r_{t})^{1/2} = var(r_{t})\). Therefore, equation (4) simplifies as such,
\(\begin{aligned} \rho(k) &= \frac{cov(r_{t},r_{t-k})}{var(r_{t})^{1/2}var(r_{t+k})^{1/2}} \\ &= \frac{cov(r_{t},r_{t-k})}{var(r_{t})} \\ &= \beta. \end{aligned}\)
Assuming stationarity, the autocorrelation coefficients are nothing but the estimates of a linear regression, which is done by regressing the return of each stock at lag 0 onto all 10 lags for each stock, therefore, 31 regressions will be estimated using the lagged data matrix. Moreover, because of \(T=241\), there is no need to use the bias-corrected estimator for the autocorrelation coefficient.
q <- c(2:11)
k <- c(1:10)
vrq<-function(q,k)
{
1+2*(sum(1-(k/q))*rho.all[k,])
}
VRq <- mapply(vrq,q,k)
The RW3 tests initially starts with estimating 10 variance-ratio statistics Lo and MacKinlay proposed, due to estimating 10 lags for each stock because of daily returns. Furthermore, this test allows for general forms of heteroskedasticity and allows for returns to be not normally distributed, thus, restricting the assumptions for the random walk test. Mathematically, the above code is,
\[\begin{align} \bar{VR(q)} = 1 + \sum_{k=1}^{q-1}2~(1-\frac{k}{q})~\rho(k), \end{align}\]where \(q=2,...,11\) and \(k=1,...,10\). There will be 10 variance-ratio statistics (\(VR(q)\)) estimated for SLVO, however, the number of variance-ratio statistics should not vary between each stock. Furthermore, \(VR(q)\) is asymptotically normally distributed with mean 1 and variance \(\frac{\hat{\theta(q)}}{nq}\), where \(nq=T=241\) for all stocks.
r.bar <- mean(dfm1[,1])
r.bar.vec <- rep(r.bar, 241)
delta.hat.1 <- 241*sum(((dfm1[2:241,1]-r.bar.vec[2:241])^2)*
((dfm1[1:240,1]-r.bar.vec[1:240])^2))/
(sum(((dfm1[2:241,1]-r.bar.vec[2:241])^2)^2))
delta.hat.2 <- 241*sum(((dfm1[3:241,1]-r.bar.vec[3:241])^2)*((dfm1[1:239,1]-r.bar.vec[1:239])^2))/(sum(((dfm1[3:241,1]-r.bar.vec[3:241])^2)^2))
delta.hat.3 <- 241*sum(((dfm1[4:241,1]-r.bar.vec[4:241])^2)*((dfm1[1:238,1]-r.bar.vec[1:238])^2))/(sum(((dfm1[4:241,1]-r.bar.vec[4:241])^2)^2))
delta.hat.4 <- 241*sum(((dfm1[5:241,1]-r.bar.vec[5:241])^2)*((dfm1[1:237,1]-r.bar.vec[1:237])^2))/(sum(((dfm1[5:241,1]-r.bar.vec[5:241])^2)^2))
delta.hat.5 <- 241*sum(((dfm1[6:241,1]-r.bar.vec[6:241])^2)*((dfm1[1:236,1]-r.bar.vec[1:236])^2))/(sum(((dfm1[6:241,1]-r.bar.vec[6:241])^2)^2))
delta.hat.6 <- 241*sum(((dfm1[7:241,1]-r.bar.vec[7:241])^2)*((dfm1[1:235,1]-r.bar.vec[1:235])^2))/(sum(((dfm1[7:241,1]-r.bar.vec[7:241])^2)^2))
delta.hat.7 <- 241*sum(((dfm1[8:241,1]-r.bar.vec[8:241])^2)*((dfm1[1:234,1]-r.bar.vec[1:234])^2))/(sum(((dfm1[8:241,1]-r.bar.vec[8:241])^2)^2))
delta.hat.8 <- 241*sum(((dfm1[9:241,1]-r.bar.vec[9:241])^2)*((dfm1[1:233,1]-r.bar.vec[1:233])^2))/(sum(((dfm1[9:241,1]-r.bar.vec[9:241])^2)^2))
delta.hat.9 <- 241*sum(((dfm1[10:241,1]-r.bar.vec[10:241])^2)*((dfm1[1:232,1]-r.bar.vec[1:232])^2))/(sum(((dfm1[10:241,1]-r.bar.vec[10:241])^2)^2))
delta.hat.10 <- 241*sum(((dfm1[11:241,1]-r.bar.vec[11:241])^2)*((dfm1[1:231,1]-r.bar.vec[1:231])^2))/(sum(((dfm1[11:241,1]-r.bar.vec[11:241])^2)^2))
delta.hat <- c(delta.hat.1, delta.hat.2, delta.hat.3, delta.hat.4, delta.hat.5, delta.hat.6, delta.hat.7, delta.hat.8, delta.hat.9, delta.hat.10) %>%
as.vector()
Second, I find \(\hat{\delta(k)}\). This process is more tedious compared to running an ‘mapply’ loop, which allows me to run a loop with multiple inputs. Unfortunately, this method produces ‘NA’ values, however, a manual calculation provides the correct estimations. This produces 10 \(\hat{\delta(k)}\) estimates for each stock.
\[\begin{align} \hat \delta(k) = \frac{nq \sum_{j=k+1}^{nq} ((r_q - \bar r)^2)~(r_{q-k~}-~\bar r)}{[\sum_{q=1}^{T}((r_q - \bar r)^2)]^2} \end{align}\]q <- c(2:11)
k <- c(1:10)
theta <- function(q,k)
{
sum((2*(1-(k/q)))^2)*delta.hat[k]
}
theta.hat <- mapply(theta,q,k)
Third, \(\hat\theta(q)\) is estimated through a similar function to the \(\bar{VR(q)}\) calculation both in formula and coding method using and ‘mapply’ loop. Finally, the test statistic may be calculated.
\[\begin{align} \hat \theta(q)= \sum_{k=1}^{q-1}~[2~(1 - \frac{k}{q})]^2 ~\delta(k) \end{align}\]psi.star <- lapply(1:10, function(k)((241)^(1/2)*VRq[k]-1)/(sqrt(theta.hat[k]))) %>%
unlist()
psi.star.1 <- psi.star
Using equation (5), (6) and (7) \(\psi^{*}(q)\) is calculated for each of the 10 lags for each stock. Furthermore, each vector of \(\psi^{*}(q)\) is stored in a different value in order to construct a table for a neater presentation.
\[\begin{align} \psi^{*}(q) = \frac{\sqrt(nq)~(\bar VR(q) -~1)}{\sqrt (\hat \theta(q))} \end{align}\]dfm1 <- dfm[,12:22] %>% as.data.frame()
rw3.1 <- lm(dfm1[,1] ~ . -Lag.0, data=dfm1)
rho.a.all <- coefficients(rw3.1) %>% t()
rho.all <- rho.a.all[,2:11] %>% as.data.frame()
q <- c(2:11)
k <- c(1:10)
vrq<-function(q,k)
{
1+2*(sum(1-(k/q))*rho.all[k,])
}
VRq <- mapply(vrq,q,k)
r.bar <- mean(dfm1[,1])
r.bar.vec <- rep(r.bar, 241)
delta.hat.1 <- 241*sum(((dfm1[2:241,1]-r.bar.vec[2:241])^2)*((dfm1[1:240,1]-r.bar.vec[1:240])^2))/(sum(((dfm1[2:241,1]-r.bar.vec[2:241])^2)^2))
delta.hat.2 <- 241*sum(((dfm1[3:241,1]-r.bar.vec[3:241])^2)*((dfm1[1:239,1]-r.bar.vec[1:239])^2))/(sum(((dfm1[3:241,1]-r.bar.vec[3:241])^2)^2))
delta.hat.3 <- 241*sum(((dfm1[4:241,1]-r.bar.vec[4:241])^2)*((dfm1[1:238,1]-r.bar.vec[1:238])^2))/(sum(((dfm1[4:241,1]-r.bar.vec[4:241])^2)^2))
delta.hat.4 <- 241*sum(((dfm1[5:241,1]-r.bar.vec[5:241])^2)*((dfm1[1:237,1]-r.bar.vec[1:237])^2))/(sum(((dfm1[5:241,1]-r.bar.vec[5:241])^2)^2))
delta.hat.5 <- 241*sum(((dfm1[6:241,1]-r.bar.vec[6:241])^2)*((dfm1[1:236,1]-r.bar.vec[1:236])^2))/(sum(((dfm1[6:241,1]-r.bar.vec[6:241])^2)^2))
delta.hat.6 <- 241*sum(((dfm1[7:241,1]-r.bar.vec[7:241])^2)*((dfm1[1:235,1]-r.bar.vec[1:235])^2))/(sum(((dfm1[7:241,1]-r.bar.vec[7:241])^2)^2))
delta.hat.7 <- 241*sum(((dfm1[8:241,1]-r.bar.vec[8:241])^2)*((dfm1[1:234,1]-r.bar.vec[1:234])^2))/(sum(((dfm1[8:241,1]-r.bar.vec[8:241])^2)^2))
delta.hat.8 <- 241*sum(((dfm1[9:241,1]-r.bar.vec[9:241])^2)*((dfm1[1:233,1]-r.bar.vec[1:233])^2))/(sum(((dfm1[9:241,1]-r.bar.vec[9:241])^2)^2))
delta.hat.9 <- 241*sum(((dfm1[10:241,1]-r.bar.vec[10:241])^2)*((dfm1[1:232,1]-r.bar.vec[1:232])^2))/(sum(((dfm1[10:241,1]-r.bar.vec[10:241])^2)^2))
delta.hat.10 <- 241*sum(((dfm1[11:241,1]-r.bar.vec[11:241])^2)*((dfm1[1:231,1]-r.bar.vec[1:231])^2))/(sum(((dfm1[11:241,1]-r.bar.vec[11:241])^2)^2))
delta.hat <- c(delta.hat.1, delta.hat.2, delta.hat.3, delta.hat.4, delta.hat.5, delta.hat.6, delta.hat.7, delta.hat.8, delta.hat.9, delta.hat.10) %>%
as.vector()
q <- c(2:11)
k <- c(1:10)
theta <- function(q,k)
{
sum((2*(1-(k/q)))^2)*delta.hat[k]
}
theta.hat <- mapply(theta,q,k)
psi.star <- lapply(1:10, function(k)((241)^(1/2)*VRq[k]-1)/(sqrt(theta.hat[k]))) %>%
unlist()
psi.star.2 <- psi.star
The above code and process is exactly the same as for stock SLVO, for completion, I have included this code chuck to display how it looks for all remaining 29 stocks. The only difference is the specification of columns in the data matrix which contains the lagged matrix for all 31 stocks.
rw3.table <- cbind(psi.star.1, psi.star.2, psi.star.3, psi.star.4, psi.star.5, psi.star.6, psi.star.7, psi.star.8, psi.star.9, psi.star.10, psi.star.11, psi.star.12, psi.star.13, psi.star.14, psi.star.15, psi.star.16, psi.star.17, psi.star.18, psi.star.19, psi.star.20, psi.star.21, psi.star.22, psi.star.23, psi.star.24, psi.star.25, psi.star.26, psi.star.27, psi.star.28, psi.star.29, psi.star.30, psi.star.31) %>% t() %>% as.data.frame()
colnames(rw3.table) <- seq(1:10)
rownames(rw3.table) <- c(small.tickers, mid.tickers, large.tickers, "SPY")
knitr::kable(rw3.table, digits = 3, caption = "Random Walk 3 Test Statistics of 10 Lags for Daily Returns of 30 Stocks and the Market Portolfio")
| 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | |
|---|---|---|---|---|---|---|---|---|---|---|
| SLVO | 1.098 | 2.876 | 4.541 | 5.544 | 6.012 | 6.958 | 6.895 | 9.407 | 12.677 | 13.243 |
| STAF | 1.932 | 6.685 | 22.567 | 21.218 | 51.106 | 26.789 | 20.777 | 74.341 | 91.134 | 70.504 |
| NAOV | 1.128 | 3.333 | 4.342 | 4.235 | 6.433 | 5.225 | 7.649 | 6.842 | 8.639 | 9.493 |
| FTEK | 4.440 | 2.737 | 7.232 | 5.905 | 16.953 | 15.581 | 20.517 | 16.991 | 31.444 | 27.786 |
| DCAR | 1.312 | 2.595 | 5.060 | 5.414 | 8.543 | 7.202 | 12.103 | 12.447 | 16.570 | 13.234 |
| CTIB | 1.960 | 3.488 | 5.498 | 7.395 | 8.950 | 10.024 | 12.239 | 11.473 | 15.068 | 14.445 |
| ATAI | 4.335 | 14.066 | 13.500 | 55.337 | 50.427 | 29.805 | 36.444 | 36.485 | 23.582 | 88.160 |
| KTOV | 1.934 | 6.837 | 11.687 | 6.067 | 12.949 | 7.267 | 16.583 | 25.506 | 35.746 | 20.795 |
| CLIR | 1.249 | 2.402 | 3.205 | 4.464 | 5.600 | 6.033 | 6.767 | 8.172 | 7.332 | 8.386 |
| CDMOP | 0.411 | 2.568 | 7.113 | 8.348 | 6.961 | 7.533 | 10.804 | 17.454 | 24.902 | 23.478 |
| OXLC | 1.322 | 2.434 | 3.599 | 5.315 | 7.892 | 8.300 | 10.220 | 13.305 | 13.791 | 18.199 |
| LNDC | 2.088 | 2.441 | 3.912 | 4.754 | 6.647 | 6.800 | 7.765 | 9.135 | 10.441 | 11.244 |
| ALCO | 1.364 | 2.903 | 4.085 | 4.088 | 5.645 | 7.893 | 9.398 | 9.619 | 9.132 | 10.496 |
| ALBO | 1.502 | 2.539 | 3.964 | 4.775 | 5.702 | 5.910 | 6.739 | 9.925 | 10.525 | 10.263 |
| RMTI | 1.311 | 2.711 | 4.146 | 6.926 | 6.828 | 8.496 | 9.579 | 13.527 | 15.206 | 17.487 |
| FRTA | 1.986 | 3.709 | 5.114 | 6.090 | 7.143 | 9.191 | 9.388 | 15.490 | 13.032 | 17.146 |
| PRTK | 2.323 | 2.515 | 4.528 | 4.472 | 7.223 | 7.789 | 12.313 | 10.608 | 13.748 | 13.704 |
| AVCO | 1.740 | 3.073 | 6.264 | 19.984 | 13.613 | 16.837 | 21.540 | 9.283 | 16.885 | 27.677 |
| WOOD | 1.552 | 2.476 | 3.258 | 4.005 | 5.244 | 6.329 | 6.832 | 8.752 | 9.856 | 10.089 |
| PETS | 1.818 | 3.344 | 4.483 | 6.007 | 7.480 | 7.637 | 9.650 | 12.721 | 14.288 | 8.987 |
| COLM | 1.493 | 2.894 | 4.532 | 5.283 | 7.209 | 8.056 | 9.294 | 12.697 | 11.254 | 13.426 |
| SSB | 1.731 | 3.732 | 3.705 | 5.374 | 8.072 | 8.089 | 10.539 | 10.141 | 10.326 | 16.054 |
| AGNC | 1.344 | 2.076 | 3.325 | 4.070 | 5.380 | 5.636 | 7.490 | 8.066 | 8.201 | 9.569 |
| CSCO | 1.346 | 2.375 | 2.953 | 4.205 | 5.108 | 5.840 | 7.715 | 6.853 | 8.901 | 9.720 |
| MLNX | 1.338 | 2.635 | 5.871 | 7.930 | 9.325 | 12.387 | 17.691 | 12.535 | 13.140 | 19.500 |
| CHTR | 2.104 | 3.319 | 5.422 | 7.314 | 8.429 | 11.046 | 10.744 | 15.139 | 15.152 | 17.926 |
| OKTA | 2.623 | 2.758 | 4.950 | 5.928 | 5.948 | 7.044 | 10.877 | 7.820 | 14.878 | 11.014 |
| VRNT | 1.353 | 4.105 | 7.405 | 7.905 | 9.608 | 14.595 | 13.134 | 20.303 | 23.663 | 22.081 |
| SAVE | 1.926 | 4.049 | 5.944 | 6.710 | 8.481 | 8.503 | 7.237 | 12.107 | 16.799 | 17.131 |
| IBOC | 1.634 | 3.099 | 3.727 | 5.120 | 6.312 | 8.120 | 8.781 | 8.367 | 8.310 | 10.778 |
| SPY | 1.593 | 2.414 | 3.261 | 3.899 | 5.995 | 6.321 | 9.178 | 8.694 | 9.366 | 8.924 |
For the analysis section, I have included a table for the test statistics of all 31 stocks. More specifically, 30 stocks and the market portfolio proxy. The distribution of \(\psi^{*}(q)\) is normal with mean 0 and variance 1, therefore, the critical value is 1.96 at the 95% confidence interval. A test statistic further away from 0 implies a variance ratio that is less likely to be 1, thus, rejecting random walk and the efficient market hypothesis. Interestingly, there are no lags greater than 1 that fail to reject the null hypothesis. Therefore, failing to reject the null hypothesis that all \(k\), where \(k=2,...,10\), autocorrelation coefficients are jointly equal to 0 at the 95% confidence level. This is evidence for the efficient market hypothesis because of the relationship of \(E[r_{t}]=\mu\), where \(r_{t}\) is the log-returns of each stock, holds. More specifically, 23 out of the 31 stocks have first lags which are equal to 0, thus the efficient market hypothesis holds for these, and rejected entirely for 8 stocks. Moreover, there are only 2 large cap stocks for which the market efficiency hypothesis is rejected at the 95% confidence level, and 3 stocks are rejected for each small and medium cap stocks. Small cap stocks, on average, have very large test statistics for longer lags compared to large cap stocks. This is consistent with the theory that random walk fails more often in smaller cap stocks due to the more volatile nature of these returns.