library(xts)
## Loading required package: zoo
##
## Attaching package: 'zoo'
## The following objects are masked from 'package:base':
##
## as.Date, as.Date.numeric
library(quantmod)
## Loading required package: TTR
## Registered S3 method overwritten by 'quantmod':
## method from
## as.zoo.data.frame zoo
library(lubridate)
##
## Attaching package: 'lubridate'
## The following objects are masked from 'package:base':
##
## date, intersect, setdiff, union
library(dplyr)
##
## ######################### Warning from 'xts' package ##########################
## # #
## # The dplyr lag() function breaks how base R's lag() function is supposed to #
## # work, which breaks lag(my_xts). Calls to lag(my_xts) that you type or #
## # source() into this session won't work correctly. #
## # #
## # Use stats::lag() to make sure you're not using dplyr::lag(), or you can add #
## # conflictRules('dplyr', exclude = 'lag') to your .Rprofile to stop #
## # dplyr from breaking base R's lag() function. #
## # #
## # Code in packages is not affected. It's protected by R's namespace mechanism #
## # Set `options(xts.warn_dplyr_breaks_lag = FALSE)` to suppress this warning. #
## # #
## ###############################################################################
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:xts':
##
## first, last
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(xts)
library(quantmod)
begin_date <- "2013-01-01"
end_date <- "2015-12-31"
stock_namelist <- c("AAPL", "AMD", "ADI", "ABBV", "A", "APD", "AA", "CF")
data_set <- xts()
for (symbol in stock_namelist) {
tryCatch({
stock_data <- Ad(getSymbols(symbol, from = begin_date, to = end_date, auto.assign = FALSE))
data_set <- cbind(data_set, stock_data)
}, error = function(e) {
message(paste("Skipping:", symbol, "— not available."))
})
}
colnames(data_set) <- stock_namelist
X <- diff(log(data_set), na.pad = FALSE)
print("✅ First few rows of the log return matrix X:")
## [1] "✅ First few rows of the log return matrix X:"
cat("\n✅ Dimensions of X: ", nrow(X), "rows ×", ncol(X), "columns\n")
##
## ✅ Dimensions of X: 754 rows × 8 columns
library(readr)
library(dplyr)
library(lubridate)
library(xts)
ff_data <- read_csv("F-F_Research_Data_Factors_daily.CSV", skip = 4)
## New names:
## • `` -> `...1`
## Warning: One or more parsing issues, call `problems()` on your data frame for details,
## e.g.:
## dat <- vroom(...)
## problems(dat)
## Rows: 25962 Columns: 5
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (1): ...1
## dbl (4): Mkt-RF, SMB, HML, RF
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
ff_data <- ff_data[!is.na(as.numeric(ff_data[[1]])), ]
## Warning in `[.tbl_df`(ff_data, !is.na(as.numeric(ff_data[[1]])), ): NAs
## introduced by coercion
colnames(ff_data)[1:5] <- c("Date", "Mkt_RF", "SMB", "HML", "RF")
ff_data <- ff_data %>%
mutate(
Date = ymd(as.character(Date)),
Mkt_RF = as.numeric(Mkt_RF) / 100,
SMB = as.numeric(SMB) / 100,
HML = as.numeric(HML) / 100,
RF = as.numeric(RF) / 100
)
fama_lib <- xts(ff_data[, c("Mkt_RF", "SMB", "HML", "RF")], order.by = ff_data$Date)
F_FamaFrench <- fama_lib[index(X)]
print("✅ First few rows of Fama-French factors aligned with X:")
## [1] "✅ First few rows of Fama-French factors aligned with X:"
print(head(F_FamaFrench))
## Mkt_RF SMB HML RF
## 2013-01-03 -0.0013 0.0010 0.0005 0
## 2013-01-04 0.0055 0.0012 0.0037 0
## 2013-01-07 -0.0031 -0.0009 -0.0036 0
## 2013-01-08 -0.0026 0.0005 -0.0001 0
## 2013-01-09 0.0035 0.0028 -0.0040 0
## 2013-01-10 0.0067 -0.0065 0.0051 0
SP500_index <- Ad(getSymbols("^GSPC", from = begin_date, to = end_date, auto.assign = FALSE))
colnames(SP500_index) <- "SP500"
f_SP500 <- diff(log(SP500_index), na.pad = FALSE)
f_SP500 <- f_SP500[index(X)]
F_capm <- cbind(1, f_SP500) # [T x 2] matrix
Gamma <- t(solve(t(F_capm) %*% F_capm, t(F_capm) %*% X))
alpha <- Gamma[, 1]
beta <- Gamma[, 2]
E <- X - (F_capm %*% t(Gamma)) # residual matrix
Psi <- (1 / (nrow(X) - 2)) * t(E) %*% E
Sigma_CAPM <- as.numeric(var(f_SP500)) * (beta %o% beta) + diag(diag(Psi))
cat("✅ CAPM covariance matrix (first 5×5 block):\n")
## ✅ CAPM covariance matrix (first 5×5 block):
print(round(Sigma_CAPM[1:5, 1:5], 6))
## AAPL AMD ADI ABBV A
## AAPL 0.000266 0.000073 0.000073 0.000070 0.000072
## AMD 0.000073 0.000976 0.000096 0.000091 0.000095
## ADI 0.000073 0.000096 0.000236 0.000092 0.000095
## ABBV 0.000070 0.000091 0.000092 0.000298 0.000091
## A 0.000072 0.000095 0.000095 0.000091 0.000196
F_ff3 <- cbind(1, F_FamaFrench[, 1:3]) # [1, Mkt_RF, SMB, HML]
Gamma_ff3 <- t(solve(t(F_ff3) %*% F_ff3, t(F_ff3) %*% X))
alpha_ff3 <- Gamma_ff3[, 1]
B <- Gamma_ff3[, 2:4] # [N x 3] matrix of factor loadings
E_ff3 <- X - (F_ff3 %*% t(Gamma_ff3))
Psi_ff3 <- (1 / (nrow(X) - 4)) * t(E_ff3) %*% E_ff3
F_cov <- cov(F_FamaFrench[, 1:3])
Sigma_FF3 <- B %*% F_cov %*% t(B) + diag(diag(Psi_ff3))
cat("✅ Fama-French 3-Factor covariance matrix (first 5×5 block):\n")
## ✅ Fama-French 3-Factor covariance matrix (first 5×5 block):
print(round(Sigma_FF3[1:5, 1:5], 6))
## AAPL AMD ADI ABBV A
## AAPL 0.000267 0.000072 0.000073 0.000074 0.000072
## AMD 0.000072 0.000978 0.000101 0.000094 0.000102
## ADI 0.000073 0.000101 0.000237 0.000094 0.000098
## ABBV 0.000074 0.000094 0.000094 0.000299 0.000094
## A 0.000072 0.000102 0.000098 0.000094 0.000196
Sigma_true <- cov(X)
Sigma_sample <- cov(X)
error <- c(
Sample = norm(Sigma_sample - Sigma_true, "F"),
CAPM = norm(Sigma_CAPM - Sigma_true, "F"),
FF3 = norm(Sigma_FF3 - Sigma_true, "F")
)
barplot(error,
main = "Estimation Error (Frobenius Norm)",
col = "skyblue",
ylab = "Error", ylim = c(0, max(error) * 1.1))

ref <- error["Sample"]^2
PRIAL <- 100 * (ref - error^2) / ref
PRIAL[!is.finite(PRIAL)] <- 0
print(round(PRIAL, 2))
## Sample CAPM FF3
## 0 0 0
barplot(PRIAL,
main = "PRIAL for Factor Models",
col = "lightgreen",
ylab = "% Improvement",
ylim = c(0, max(PRIAL) * 1.1))

estimate_pca_cov <- function(X, K) {
alpha <- colMeans(X)
Xc <- X - matrix(alpha, nrow(X), ncol(X), byrow = TRUE)
Sigma <- cov(Xc)
eig <- eigen(Sigma)
B <- eig$vectors[, 1:K] %*% diag(sqrt(eig$values[1:K]), K, K)
Psi <- diag(diag(Sigma - B %*% t(B)))
B %*% t(B) + Psi
}
Sigma_PCA1 <- estimate_pca_cov(X, 1)
Sigma_PCA3 <- estimate_pca_cov(X, 3)
Sigma_PCA5 <- estimate_pca_cov(X, 5)
Sigma_true <- cov(X)
error <- c(
SCM = norm(Sigma_sample - Sigma_true, "F"),
SP500 = norm(Sigma_CAPM - Sigma_true, "F"),
FamaFrench = norm(Sigma_FF3 - Sigma_true, "F"),
PCA1 = norm(Sigma_PCA1 - Sigma_true, "F"),
PCA3 = norm(Sigma_PCA3 - Sigma_true, "F"),
PCA5 = norm(Sigma_PCA5 - Sigma_true, "F")
)
print(round(error, 6))
## SCM SP500 FamaFrench PCA1 PCA3 PCA5
## 0.000000 0.000124 0.000111 0.000446 0.000224 0.000119
ref <- error["SCM"]^2
PRIAL <- 100 * (ref - error^2) / ref
PRIAL[!is.finite(PRIAL)] <- 0 # Clean Inf/NaN
barplot(error,
main = "Estimation Error (Frobenius Norm)",
col = "skyblue",
ylab = "Error",
cex.names = 0.75, las = 1)

barplot(PRIAL,
main = "PRIAL for estimation of covariance matrix",
col = "bisque3",
ylab = "% Improvement",
cex.names = 0.75, las = 1)

cat("✅ Best method based on PRIAL:", names(which.max(PRIAL)), "\n")
## ✅ Best method based on PRIAL: SCM
print(round(PRIAL, 2))
## SCM SP500 FamaFrench PCA1 PCA3 PCA5
## 0 0 0 0 0 0