library(xts)
## Loading required package: zoo
##
## Attaching package: 'zoo'
## The following objects are masked from 'package:base':
##
## as.Date, as.Date.numeric
library(quantmod)
## Loading required package: TTR
## Registered S3 method overwritten by 'quantmod':
## method from
## as.zoo.data.frame zoo
# set begin-end date and stock namelist
begin_date <- "2013-01-01"
end_date <- "2025-03-31"
stock_namelist <- c("AAPL", # Apple
"MSFT", # Microsoft
"GOOGL", # Alphabet (Google)
"AMZN", # Amazon
"NVDA", # NVIDIA
"TSLA", # Tesla
"JPM", # JPMorgan Chase
"KO", # Coca-Cola
"PEP") # PepsiCo
# prepare stock data
data_set <- xts()
for (stock_index in 1:length(stock_namelist))
data_set <- cbind(data_set, Ad(getSymbols(stock_namelist[stock_index],
from = begin_date, to = end_date, auto.assign = FALSE)))
colnames(data_set) <- stock_namelist
indexClass(data_set) <- "Date"
## Warning: 'indexClass<-' is deprecated.
## Use 'tclass<-' instead.
## See help("Deprecated") and help("xts-deprecated").
X <- diff(log(data_set), na.pad = FALSE)
N <- ncol(X)
T <- nrow(X)
# prepare Fama-French factors
mydata <- read.csv("F-F_Research_Data_Factors_daily.csv", skip = 4)
mydata <- mydata[-nrow(mydata), ] # remove last row
fama_lib <- xts(x = mydata[, c(2,3,4)], order.by = as.Date(paste(mydata[, 1]), "%Y%m%d"))
F_FamaFrench <- fama_lib[index(X)]/100
# prepare index
SP500_index <- Ad(getSymbols("^GSPC", from = begin_date, to = end_date, auto.assign = FALSE))
colnames(SP500_index) <- "index"
f_SP500 <- diff(log(SP500_index), na.pad = FALSE)
# split data into training and set data
T_trn <- round(0.45*T)
X_trn <- X[1:T_trn, ]
X_tst <- X[(T_trn+1):T, ]
F_FamaFrench_trn <- F_FamaFrench[1:T_trn, ]
F_FamaFrench_tst <- F_FamaFrench[(T_trn+1):T, ]
f_SP500_trn <- f_SP500[1:T_trn, ]
f_SP500_tst <- f_SP500[(T_trn+1):T, ]
# sample covariance matrix
Sigma_SCM <- cov(X_trn)
# 1-factor model
F_ <- cbind(ones = 1, f_SP500_trn)
Gamma <- t(solve(t(F_) %*% F_, t(F_) %*% X_trn))
colnames(Gamma) <- c("alpha", "beta")
alpha <- Gamma[, 1]
beta <- Gamma[, 2]
E <- xts(t(t(X_trn) - Gamma %*% t(F_)), index(X_trn))
Psi <- (1/(T_trn-2)) * t(E) %*% E
Sigma_SP500 <- as.numeric(var(f_SP500_trn)) * beta %o% beta + diag(diag(Psi))
# Fama-French 3-factor model
F_ <- cbind(ones = 1, F_FamaFrench_trn)
Gamma <- t(solve(t(F_) %*% F_, t(F_) %*% X_trn))
colnames(Gamma) <- c("alpha", "beta1", "beta2", "beta3")
alpha <- Gamma[, 1]
B <- Gamma[, 2:4]
E <- xts(t(t(X_trn) - Gamma %*% t(F_)), index(X_trn))
Psi <- (1/(T_trn-4)) * t(E) %*% E
Sigma_FamaFrench <- B %*% cov(F_FamaFrench_trn) %*% t(B) + diag(diag(Psi))
# Statistical 1-factor model
K <- 1
alpha <- colMeans(X_trn)
X_trn_ <- X_trn - matrix(alpha, T_trn, N, byrow = TRUE)
Sigma_prev <- matrix(0, N, N)
Sigma <- (1/(T_trn-1)) * t(X_trn_) %*% X_trn_
eigSigma <- eigen(Sigma)
while (norm(Sigma - Sigma_prev, "F")/norm(Sigma, "F") > 1e-3) {
B <- eigSigma$vectors[, 1:K, drop = FALSE] %*% diag(sqrt(eigSigma$values[1:K]), K, K)
Psi <- diag(diag(Sigma - B %*% t(B)))
Sigma_prev <- Sigma
Sigma <- B %*% t(B) + Psi
eigSigma <- eigen(Sigma - Psi)
}
Sigma_PCA1 <- Sigma
# Statistical 3-factor model
K <- 3
alpha <- colMeans(X_trn)
X_trn_ <- X_trn - matrix(alpha, T_trn, N, byrow = TRUE)
Sigma_prev <- matrix(0, N, N)
Sigma <- (1/(T_trn-1)) * t(X_trn_) %*% X_trn_
eigSigma <- eigen(Sigma)
while (norm(Sigma - Sigma_prev, "F")/norm(Sigma, "F") > 1e-3) {
B <- eigSigma$vectors[, 1:K] %*% diag(sqrt(eigSigma$values[1:K]), K, K)
Psi <- diag(diag(Sigma - B %*% t(B)))
Sigma_prev <- Sigma
Sigma <- B %*% t(B) + Psi
eigSigma <- eigen(Sigma - Psi)
}
Sigma_PCA3 <- Sigma
# Statistical 5-factor model
K <- 5
alpha <- colMeans(X_trn)
X_trn_ <- X_trn - matrix(alpha, T_trn, N, byrow = TRUE)
Sigma_prev <- matrix(0, N, N)
Sigma <- (1/(T_trn-1)) * t(X_trn_) %*% X_trn_
eigSigma <- eigen(Sigma)
while (norm(Sigma - Sigma_prev, "F")/norm(Sigma, "F") > 1e-3) {
B <- eigSigma$vectors[, 1:K] %*% diag(sqrt(eigSigma$values[1:K]), K, K)
Psi <- diag(diag(Sigma - B %*% t(B)))
Sigma_prev <- Sigma
Sigma <- B %*% t(B) + Psi
eigSigma <- eigen(Sigma - Psi)
}
Sigma_PCA5 <- Sigma
Sigma_true <- cov(X_tst)
error <- c(SCM = norm(Sigma_SCM - Sigma_true, "F"),
SP500 = norm(Sigma_SP500 - Sigma_true, "F"),
FamaFrench = norm(Sigma_FamaFrench - Sigma_true, "F"),
PCA1 = norm(Sigma_PCA1 - Sigma_true, "F"),
PCA3 = norm(Sigma_PCA3 - Sigma_true, "F"),
PCA5 = norm(Sigma_PCA5 - Sigma_true, "F"))
print(error)
## SCM SP500 FamaFrench PCA1 PCA3 PCA5
## 0.001782066 0.001872833 0.001827501 0.001691325 0.001752055 0.001770201
#> SCM SP500 FamaFrench PCA1 PCA3 PCA5
#> 0.007105459 0.007100631 0.007089249 0.007139017 0.007100200 0.007103374
barplot(error, main = "Error in estimation of covariance matrix",
col = "aquamarine3", cex.names = 0.75, las = 1)

ref <- norm(Sigma_SCM - Sigma_true, "F")^2
PRIAL <- 100*(ref - error^2)/ref
print(PRIAL)
## SCM SP500 FamaFrench PCA1 PCA3 PCA5
## 0.000000 -10.446171 -5.164172 9.924492 3.339673 1.327107
#> SCM SP500 FamaFrench PCA1 PCA3 PCA5
#> 0.00000000 0.13586162 0.45574782 -0.94681006 0.14797930 0.05867364
barplot(PRIAL, main = "PRIAL for estimation of covariance matrix",
col = "bisque3", cex.names = 0.75, las = 1)
