# Load required libraries
library(xts)
## Loading required package: zoo
##
## Attaching package: 'zoo'
## The following objects are masked from 'package:base':
##
## as.Date, as.Date.numeric
library(quantmod)
## Loading required package: TTR
## Registered S3 method overwritten by 'quantmod':
## method from
## as.zoo.data.frame zoo
# Set begin-end date and stock namelist
begin_date <- "2016-01-01"
end_date <- "2017-12-31"
stock_namelist <- c("AAPL", "AMD", "ADI", "ABBV", "AEZS", "A", "APD", "AA", "CF")
sector_namelist <- c(rep("Information Technology", 3), rep("Health Care", 3), rep("Materials", 3))
# Initialize empty xts object for data storage
data_set <- xts()
# Download data from Yahoo Finance with error handling
cat("Downloading stock data...\n")
## Downloading stock data...
for (stock_index in 1:length(stock_namelist)) {
stock_symbol <- stock_namelist[stock_index]
cat("Downloading:", stock_symbol, "\n")
tryCatch({
# Download stock data
stock_data <- getSymbols(stock_symbol,
from = begin_date,
to = end_date,
auto.assign = FALSE)
# Extract adjusted close prices
adj_close <- Ad(stock_data)
# Combine with existing data
data_set <- cbind(data_set, adj_close)
}, error = function(e) {
cat("Error downloading", stock_symbol, ":", e$message, "\n")
cat("Skipping", stock_symbol, "\n")
})
}
## Downloading: AAPL
## Downloading: AMD
## Downloading: ADI
## Downloading: ABBV
## Downloading: AEZS
## Warning: Failed to open
## 'https://query2.finance.yahoo.com/v8/finance/chart/AEZS?period1=1451606400&period2=1514678400&interval=1d':
## The requested URL returned error: 404
## Error downloading AEZS : Unable to import "AEZS".
## cannot open the connection
## Skipping AEZS
## Downloading: A
## Downloading: APD
## Downloading: AA
## Downloading: CF
# Set column names to stock symbols
if (ncol(data_set) > 0) {
# Only use names for successfully downloaded stocks
successful_stocks <- stock_namelist[1:ncol(data_set)]
colnames(data_set) <- successful_stocks
# Set index class to Date
indexClass(data_set) <- "Date"
# Display structure and data
cat("\nData structure:\n")
str(data_set)
cat("\nFirst 6 rows:\n")
print(head(data_set))
cat("\nLast 6 rows:\n")
print(tail(data_set))
} else {
cat("No data was successfully downloaded.\n")
}
## Warning in `indexClass<-`(`*tmp*`, value = "Date"): 'indexClass<-' is deprecated.
## Use 'tclass<-' instead.
## See help("Deprecated") and help("xts-deprecated").
##
## Data structure:
## An xts object on 2016-01-04 / 2017-12-29 containing:
## Data: double [503, 8]
## Columns: AAPL, AMD, ADI, ABBV, AEZS ... with 3 more columns
## Index: Date [503] (TZ: "UTC")
##
## First 6 rows:
## AAPL AMD ADI ABBV AEZS A APD AA
## 2016-01-04 23.80316 2.77 45.21426 38.61563 37.79136 95.48467 22.11100 30.82980
## 2016-01-05 23.20667 2.75 44.88205 38.45474 37.66133 93.77734 21.10906 29.86305
## 2016-01-06 22.75252 2.51 42.97182 38.46144 37.82851 91.49350 19.60615 27.28249
## 2016-01-07 21.79226 2.28 41.87550 38.34750 36.22176 88.42622 18.83192 25.98838
## 2016-01-08 21.90749 2.14 41.51007 37.30184 35.84096 87.96798 18.37649 25.74479
## 2016-01-11 22.26223 2.34 42.49841 36.11541 35.23726 88.31535 18.21709 24.70191
##
## Last 6 rows:
## AAPL AMD ADI ABBV AEZS A APD AA
## 2017-12-21 41.07221 10.89 77.36378 70.65285 63.82708 136.2600 47.08080 34.11665
## 2017-12-22 41.07221 10.54 77.53831 70.86934 63.66638 136.4771 48.04182 34.49710
## 2017-12-26 40.03021 10.46 77.34632 70.53738 63.57184 136.1264 48.41663 35.43996
## 2017-12-27 40.03725 10.53 77.75650 70.78273 63.61911 136.6859 49.81973 35.62191
## 2017-12-28 40.14990 10.55 78.00085 70.56626 63.76089 137.4793 52.03009 35.24146
## 2017-12-29 39.71573 10.28 77.69540 69.78693 63.44732 137.8237 51.77061 35.18356
# Download S&P 500 index data
cat("\nDownloading S&P 500 index data...\n")
##
## Downloading S&P 500 index data...
tryCatch({
SP500_index <- getSymbols("^GSPC",
from = begin_date,
to = end_date,
auto.assign = FALSE)
# Extract adjusted close
SP500_index <- Ad(SP500_index)
colnames(SP500_index) <- "index"
cat("S&P 500 data downloaded successfully.\n")
cat("\nFirst 6 rows of S&P 500:\n")
print(head(SP500_index))
# Plot S&P 500 index
plot(SP500_index, main = "S&P 500 Index (2016-2017)",
ylab = "Index Value", xlab = "Date")
}, error = function(e) {
cat("Error downloading S&P 500 data:", e$message, "\n")
})
## S&P 500 data downloaded successfully.
##
## First 6 rows of S&P 500:
## index
## 2016-01-04 2012.66
## 2016-01-05 2016.71
## 2016-01-06 1990.26
## 2016-01-07 1943.09
## 2016-01-08 1922.03
## 2016-01-11 1923.67

# Additional: Plot individual stocks if data is available
if (exists("data_set") && ncol(data_set) > 0) {
# Plot all stocks
plot(data_set, main = "Stock Prices (2016-2017)",
legend.loc = "topleft",
ylab = "Adjusted Close Price")
}

cat("\nScript completed successfully!\n")
##
## Script completed successfully!
# Simple and direct approach - closest to your original code
# Assumes data_set and SP500_index are already loaded
# Check if data exists
if (!exists("data_set") || !exists("SP500_index")) {
stop("Please load data_set and SP500_index first")
}
# Compute the log-returns of the stocks and of the SP500 index as explicit factor
X <- diff(log(data_set), na.pad = FALSE)
f <- diff(log(SP500_index), na.pad = FALSE)
# Get dimensions
N <- ncol(X) # number of stocks
T <- nrow(X) # number of days
# Display structure
cat("Stock log returns structure:\n")
## Stock log returns structure:
str(X)
## An xts object on 2016-01-05 / 2017-12-29 containing:
## Data: double [502, 8]
## Columns: AAPL, AMD, ADI, ABBV, AEZS ... with 3 more columns
## Index: Date [502] (TZ: "UTC")
cat("\nS&P 500 log returns structure:\n")
##
## S&P 500 log returns structure:
str(f)
## An xts object on 2016-01-05 / 2017-12-29 containing:
## Data: double [502, 1]
## Columns: index
## Index: Date [502] (TZ: "UTC")
## xts Attributes:
## $ src : chr "yahoo"
## $ updated: POSIXct[1:1], format: "2025-05-30 10:58:22"
cat("\nDimensions:\n")
##
## Dimensions:
cat("Number of stocks (N):", N, "\n")
## Number of stocks (N): 8
cat("Number of trading days (T):", T, "\n")
## Number of trading days (T): 502
# Display first few rows
cat("\nFirst 6 rows of X:\n")
##
## First 6 rows of X:
print(head(X))
## AAPL AMD ADI ABBV AEZS
## 2016-01-05 -0.025378479 -0.007246402 -0.007374599 -0.0041751168 -0.003446405
## 2016-01-06 -0.019763876 -0.091318162 -0.043493271 0.0001742786 0.004428987
## 2016-01-07 -0.043120971 -0.096107319 -0.025843648 -0.0029670740 -0.043403026
## 2016-01-08 0.005273577 -0.063369552 -0.008764856 -0.0276466337 -0.010568518
## 2016-01-11 0.016062767 0.089345015 0.023530489 -0.0323228539 -0.016987508
## 2016-01-12 0.014409023 0.021142517 0.009336834 0.0176606192 0.006568018
## A APD AA
## 2016-01-05 -0.018042530 -0.046372887 -0.031859921
## 2016-01-06 -0.024655279 -0.073859084 -0.090376989
## 2016-01-07 -0.034099354 -0.040289948 -0.048595278
## 2016-01-08 -0.005195696 -0.024480919 -0.009417288
## 2016-01-11 0.003941006 -0.008711807 -0.041351744
## 2016-01-12 0.014292056 -0.094310868 0.002769633
cat("\nFirst 6 rows of f:\n")
##
## First 6 rows of f:
print(head(f))
## index
## 2016-01-05 0.0020102041
## 2016-01-06 -0.0132021630
## 2016-01-07 -0.0239858165
## 2016-01-08 -0.0108975374
## 2016-01-11 0.0008529083
## 2016-01-12 0.0077725141
# Check for any issues
if (any(is.na(X))) {
cat("Warning: X contains", sum(is.na(X)), "NA values\n")
}
if (any(is.na(f))) {
cat("Warning: f contains", sum(is.na(f)), "NA values\n")
}
cat("\nLog returns calculation completed!\n")
##
## Log returns calculation completed!
# Simple and direct approach - closest to your original code
# Assumes data_set and SP500_index are already loaded
# Check if data exists
if (!exists("data_set") || !exists("SP500_index")) {
stop("Please load data_set and SP500_index first")
}
# Compute the log-returns of the stocks and of the SP500 index as explicit factor
X <- diff(log(data_set), na.pad = FALSE)
f <- diff(log(SP500_index), na.pad = FALSE)
# Get dimensions
N <- ncol(X) # number of stocks
T <- nrow(X) # number of days
# Display structure
cat("Stock log returns structure:\n")
## Stock log returns structure:
str(X)
## An xts object on 2016-01-05 / 2017-12-29 containing:
## Data: double [502, 8]
## Columns: AAPL, AMD, ADI, ABBV, AEZS ... with 3 more columns
## Index: Date [502] (TZ: "UTC")
cat("\nS&P 500 log returns structure:\n")
##
## S&P 500 log returns structure:
str(f)
## An xts object on 2016-01-05 / 2017-12-29 containing:
## Data: double [502, 1]
## Columns: index
## Index: Date [502] (TZ: "UTC")
## xts Attributes:
## $ src : chr "yahoo"
## $ updated: POSIXct[1:1], format: "2025-05-30 10:58:22"
cat("\nDimensions:\n")
##
## Dimensions:
cat("Number of stocks (N):", N, "\n")
## Number of stocks (N): 8
cat("Number of trading days (T):", T, "\n")
## Number of trading days (T): 502
# Display first few rows
cat("\nFirst 6 rows of X:\n")
##
## First 6 rows of X:
print(head(X))
## AAPL AMD ADI ABBV AEZS
## 2016-01-05 -0.025378479 -0.007246402 -0.007374599 -0.0041751168 -0.003446405
## 2016-01-06 -0.019763876 -0.091318162 -0.043493271 0.0001742786 0.004428987
## 2016-01-07 -0.043120971 -0.096107319 -0.025843648 -0.0029670740 -0.043403026
## 2016-01-08 0.005273577 -0.063369552 -0.008764856 -0.0276466337 -0.010568518
## 2016-01-11 0.016062767 0.089345015 0.023530489 -0.0323228539 -0.016987508
## 2016-01-12 0.014409023 0.021142517 0.009336834 0.0176606192 0.006568018
## A APD AA
## 2016-01-05 -0.018042530 -0.046372887 -0.031859921
## 2016-01-06 -0.024655279 -0.073859084 -0.090376989
## 2016-01-07 -0.034099354 -0.040289948 -0.048595278
## 2016-01-08 -0.005195696 -0.024480919 -0.009417288
## 2016-01-11 0.003941006 -0.008711807 -0.041351744
## 2016-01-12 0.014292056 -0.094310868 0.002769633
cat("\nFirst 6 rows of f:\n")
##
## First 6 rows of f:
print(head(f))
## index
## 2016-01-05 0.0020102041
## 2016-01-06 -0.0132021630
## 2016-01-07 -0.0239858165
## 2016-01-08 -0.0108975374
## 2016-01-11 0.0008529083
## 2016-01-12 0.0077725141
# Check for any issues
if (any(is.na(X))) {
cat("Warning: X contains", sum(is.na(X)), "NA values\n")
}
if (any(is.na(f))) {
cat("Warning: f contains", sum(is.na(f)), "NA values\n")
}
cat("\nLog returns calculation completed!\n")
##
## Log returns calculation completed!
F_ <- cbind(ones = 1, f)
Gamma <- t(X) %*% F_ %*% solve(t(F_) %*% F_) # better: Gamma <- t(solve(t(F_) %*% F_, t(F_) %*% X))
colnames(Gamma) <- c("alpha", "beta")
alpha <- Gamma[, 1] # or alpha <- Gamma[, "alpha"]
beta <- Gamma[, 2] # or beta <- Gamma[, "beta"]
print(Gamma)
## alpha beta
## AAPL 0.0003999083 1.0957928
## AMD 0.0013825598 2.1738305
## ADI 0.0003609969 1.2683033
## ABBV 0.0006684638 0.9022737
## AEZS 0.0002810625 1.3277210
## A 0.0001501081 1.0270552
## APD 0.0006429138 1.8593529
## AA -0.0006256995 1.5712735
#> alpha beta
#> AAPL 0.0003999086 1.0957919
#> AMD 0.0013825599 2.1738304
#> ADI 0.0003609968 1.2683047
#> ABBV 0.0006684632 0.9022748
#> AEZS -0.0022091301 1.7115761
#> A 0.0002810616 1.3277212
#> APD 0.0001786375 1.0239453
#> AA 0.0006429140 1.8593524
#> CF -0.0006029705 1.5702493
E <- xts(t(t(X) - Gamma %*% t(F_)), index(X)) # residuals
Psi <- (1/(T-2)) * t(E) %*% E
Sigma <- as.numeric(var(f)) * beta %o% beta + diag(diag(Psi))
# Manual Factor Model Implementation using Base R
# No external packages required beyond what we already have
# Check if required data exists
if (!exists("X") || !exists("f")) {
stop("Log returns data (X and f) not found. Please run the log returns calculation first.")
}
cat("Fitting factor model manually...\n")
## Fitting factor model manually...
cat("Stock returns (X) dimensions:", dim(X), "\n")
## Stock returns (X) dimensions: 502 8
cat("Factor returns (f) dimensions:", dim(f), "\n")
## Factor returns (f) dimensions: 502 1
# Ensure data alignment and remove any NA values
if (nrow(X) != nrow(f)) {
cat("Aligning data dimensions...\n")
min_rows <- min(nrow(X), nrow(f))
X <- X[1:min_rows, ]
f <- f[1:min_rows, ]
}
# Convert to matrices for easier computation
X_matrix <- as.matrix(X)
f_vector <- as.vector(f)
# Check for and remove any NA values
complete_obs <- complete.cases(cbind(X_matrix, f_vector))
X_matrix <- X_matrix[complete_obs, ]
f_vector <- f_vector[complete_obs]
cat("Final data dimensions after cleaning:\n")
## Final data dimensions after cleaning:
cat("X:", dim(X_matrix), "\n")
## X: 502 8
cat("f:", length(f_vector), "\n")
## f: 502
# Initialize results storage
n_stocks <- ncol(X_matrix)
stock_names <- colnames(X_matrix)
# Storage for results
alpha <- numeric(n_stocks)
beta <- numeric(n_stocks)
r_squared <- numeric(n_stocks)
residual_sd <- numeric(n_stocks)
cat("\nFitting individual regressions for each stock...\n")
##
## Fitting individual regressions for each stock...
# Fit factor model for each stock: R_i = alpha_i + beta_i * R_market + epsilon_i
for (i in 1:n_stocks) {
# Get stock returns for stock i
stock_returns <- X_matrix[, i]
# Fit linear regression: stock_returns ~ market_returns
model <- lm(stock_returns ~ f_vector)
# Extract coefficients
alpha[i] <- model$coefficients[1] # Intercept (alpha)
beta[i] <- model$coefficients[2] # Slope (beta)
# Extract model statistics
r_squared[i] <- summary(model)$r.squared
residual_sd[i] <- summary(model)$sigma
cat("Stock", i, "of", n_stocks, "completed\n")
}
## Stock 1 of 8 completed
## Stock 2 of 8 completed
## Stock 3 of 8 completed
## Stock 4 of 8 completed
## Stock 5 of 8 completed
## Stock 6 of 8 completed
## Stock 7 of 8 completed
## Stock 8 of 8 completed
# Create results table
results <- data.frame(
Stock = stock_names,
Alpha = alpha,
Beta = beta,
R_squared = r_squared,
Residual_SD = residual_sd,
stringsAsFactors = FALSE
)
# Display results in the format similar to the original code
cat("\nAlpha and Beta estimates:\n")
##
## Alpha and Beta estimates:
alpha_beta_matrix <- cbind(alpha = alpha, beta = beta)
rownames(alpha_beta_matrix) <- stock_names
print(alpha_beta_matrix)
## alpha beta
## AAPL 0.0003999083 1.0957928
## AMD 0.0013825598 2.1738305
## ADI 0.0003609969 1.2683033
## ABBV 0.0006684638 0.9022737
## AEZS 0.0002810625 1.3277210
## A 0.0001501081 1.0270552
## APD 0.0006429138 1.8593529
## AA -0.0006256995 1.5712735
cat("\nDetailed Results:\n")
##
## Detailed Results:
print(results)
## Stock Alpha Beta R_squared Residual_SD
## 1 AAPL 0.0003999083 1.0957928 0.3010341 0.010903540
## 2 AMD 0.0013825598 2.1738305 0.1037188 0.041729004
## 3 ADI 0.0003609969 1.2683033 0.3574406 0.011104440
## 4 ABBV 0.0006684638 0.9022737 0.1676340 0.013129048
## 5 AEZS 0.0002810625 1.3277210 0.4585150 0.009421980
## 6 A 0.0001501081 1.0270552 0.4165524 0.007937409
## 7 APD 0.0006429138 1.8593529 0.2056430 0.023863361
## 8 AA -0.0006256995 1.5712735 0.1339533 0.026089428
# Summary statistics
cat("\nSummary Statistics:\n")
##
## Summary Statistics:
cat("Average Alpha:", mean(alpha), "\n")
## Average Alpha: 0.0004075392
cat("Average Beta:", mean(beta), "\n")
## Average Beta: 1.4032
cat("Average R-squared:", mean(r_squared), "\n")
## Average R-squared: 0.2680614
# Interpretation
cat("\nInterpretation:\n")
##
## Interpretation:
cat("Alpha > 0: Stock outperforms the market (after adjusting for systematic risk)\n")
## Alpha > 0: Stock outperforms the market (after adjusting for systematic risk)
cat("Beta > 1: Stock is more volatile than the market\n")
## Beta > 1: Stock is more volatile than the market
cat("Beta < 1: Stock is less volatile than the market\n")
## Beta < 1: Stock is less volatile than the market
cat("Beta = 1: Stock moves with the market\n")
## Beta = 1: Stock moves with the market
# Identify interesting stocks
high_alpha_stocks <- stock_names[alpha > 0.001]
high_beta_stocks <- stock_names[beta > 1.5]
low_beta_stocks <- stock_names[beta < 0.8]
if (length(high_alpha_stocks) > 0) {
cat("\nStocks with high alpha (> 0.1%):", paste(high_alpha_stocks, collapse = ", "), "\n")
}
##
## Stocks with high alpha (> 0.1%): AMD
if (length(high_beta_stocks) > 0) {
cat("High beta stocks (> 1.5):", paste(high_beta_stocks, collapse = ", "), "\n")
}
## High beta stocks (> 1.5): AMD, APD, AA
if (length(low_beta_stocks) > 0) {
cat("Low beta stocks (< 0.8):", paste(low_beta_stocks, collapse = ", "), "\n")
}
# Create visualizations
par(mfrow = c(2, 2))
# 1. Beta coefficients
barplot(beta,
names.arg = stock_names,
main = "Beta Coefficients",
ylab = "Beta",
las = 2,
col = "steelblue")
abline(h = 1, col = "red", lty = 2, lwd = 2)
legend("topright", "Market Beta = 1", col = "red", lty = 2)
# 2. Alpha coefficients
barplot(alpha,
names.arg = stock_names,
main = "Alpha Coefficients",
ylab = "Alpha",
las = 2,
col = "darkgreen")
abline(h = 0, col = "red", lty = 2, lwd = 2)
legend("topright", "Zero Alpha", col = "red", lty = 2)
# 3. R-squared values
barplot(r_squared,
names.arg = stock_names,
main = "R-squared (Explanatory Power)",
ylab = "R-squared",
las = 2,
col = "orange")
# 4. Scatter plot: Beta vs Alpha
plot(beta, alpha,
pch = 19,
col = "blue",
main = "Alpha vs Beta",
xlab = "Beta",
ylab = "Alpha")
text(beta, alpha, labels = stock_names, pos = 3, cex = 0.8)
abline(h = 0, v = 1, col = "red", lty = 2)

par(mfrow = c(1, 1))
# Store results in global environment for further use
factor_model <- list(
alpha = alpha,
beta = beta,
r.squared = r_squared,
residual.sd = residual_sd,
stock.names = stock_names,
results = results
)
cat("\nFactor model analysis completed successfully!\n")
##
## Factor model analysis completed successfully!
cat("Results stored in 'factor_model' list and 'results' data frame.\n")
## Results stored in 'factor_model' list and 'results' data frame.
library(corrplot) #install.packages("corrplot")
## corrplot 0.95 loaded
corrplot(cov2cor(Sigma), mar = c(0,0,1,0),
main = "Covariance matrix of log-returns from 1-factor model")

corrplot(cov2cor(Psi), mar = c(0,0,1,0), order = "hclust", addrect = 3,
main = "Covariance matrix of residuals")

cbind(stock_namelist, sector_namelist) # to recall the sectors
## stock_namelist sector_namelist
## [1,] "AAPL" "Information Technology"
## [2,] "AMD" "Information Technology"
## [3,] "ADI" "Information Technology"
## [4,] "ABBV" "Health Care"
## [5,] "AEZS" "Health Care"
## [6,] "A" "Health Care"
## [7,] "APD" "Materials"
## [8,] "AA" "Materials"
## [9,] "CF" "Materials"
#> stock_namelist sector_namelist
#> [1,] "AAPL" "Information Technology"
#> [2,] "AMD" "Information Technology"
#> [3,] "ADI" "Information Technology"
#> [4,] "ABBV" "Health Care"
#> [5,] "AEZS" "Health Care"
#> [6,] "A" "Health Care"
#> [7,] "APD" "Materials"
#> [8,] "AA" "Materials"
#> [9,] "CF" "Materials"
library(xts)
library(quantmod)
# set begin-end date and stock namelist
begin_date <- "2016-10-01"
end_date <- "2017-06-30"
stock_namelist <- c("SPY","XIVH", "SPHB", "SPLV", "USMV", "JKD")
# download data from YahooFinance
data_set <- xts()
for (stock_index in 1:length(stock_namelist))
data_set <- cbind(data_set, Ad(getSymbols(stock_namelist[stock_index],
from = begin_date, to = end_date, auto.assign = FALSE)))
colnames(data_set) <- stock_namelist
indexClass(data_set) <- "Date"
## Warning in `indexClass<-`(`*tmp*`, value = "Date"): 'indexClass<-' is deprecated.
## Use 'tclass<-' instead.
## See help("Deprecated") and help("xts-deprecated").
head(data_set)
## SPY XIVH SPHB SPLV USMV JKD
## 2016-10-03 187.3318 29.400 29.48051 34.29298 38.58932 30.94241
## 2016-10-04 186.3768 30.160 29.39980 33.89275 38.21291 30.82150
## 2016-10-05 187.2016 30.160 29.96482 33.81774 38.12738 30.95934
## 2016-10-06 187.3318 30.160 29.90205 33.87609 38.15304 30.99561
## 2016-10-07 186.6893 30.670 29.66886 33.78436 38.11028 30.93032
## 2016-10-10 187.6617 31.394 29.94689 33.95948 38.29847 31.12861
#> SPY XIVH SPHB SPLV USMV JKD
#> 2016-10-03 203.6610 29.400 31.38322 38.55683 42.88382 119.8765
#> 2016-10-04 202.6228 30.160 31.29729 38.10687 42.46553 119.4081
#> 2016-10-05 203.5195 30.160 31.89880 38.02249 42.37048 119.9421
#> 2016-10-06 203.6610 30.160 31.83196 38.08813 42.39899 120.0826
#> 2016-10-07 202.9626 30.670 31.58372 37.98500 42.35146 119.8296
#> 2016-10-10 204.0197 31.394 31.87970 38.18187 42.56060 120.5978
SP500_index <- Ad(getSymbols("^GSPC", from = begin_date, to = end_date, auto.assign = FALSE))
colnames(SP500_index) <- "index"
head(SP500_index)
## index
## 2016-10-03 2161.20
## 2016-10-04 2150.49
## 2016-10-05 2159.73
## 2016-10-06 2160.77
## 2016-10-07 2153.74
## 2016-10-10 2163.66
#> index
#> 2016-10-03 2161.20
#> 2016-10-04 2150.49
#> 2016-10-05 2159.73
#> 2016-10-06 2160.77
#> 2016-10-07 2153.74
#> 2016-10-10 2163.66
# compute the log-returns of the stocks and of the SP500 index as explicit factor
X <- diff(log(data_set), na.pad = FALSE)
N <- ncol(X) # number of stocks
T <- nrow(X) # number of days
f <- diff(log(SP500_index), na.pad = FALSE)
F_ <- cbind(ones = 1, f)
Gamma <- t(solve(t(F_) %*% F_, t(F_) %*% X))
colnames(Gamma) <- c("alpha", "beta")
alpha <- Gamma[, 1]
beta <- Gamma[, 2]
print(Gamma)
## alpha beta
## SPY 7.142366e-05 1.0071399
## XIVH 1.810392e-03 2.4971086
## SPHB -2.422129e-04 1.5613538
## SPLV 1.070942e-04 0.6777087
## USMV 1.166175e-04 0.6511664
## JKD 1.628182e-04 0.9047726
#> alpha beta
#> SPY 7.142225e-05 1.0071424
#> XIVH 1.810392e-03 2.4971086
#> SPHB -2.422107e-04 1.5613533
#> SPLV 1.070918e-04 0.6777149
#> USMV 1.166177e-04 0.6511667
#> JKD 2.569578e-04 0.8883843
{ par(mfrow = c(1,2)) # two plots side by side
barplot(rev(alpha), horiz = TRUE, main = "alpha", col = "red", cex.names = 0.75, las = 1)
barplot(rev(beta), horiz = TRUE, main = "beta", col = "blue", cex.names = 0.75, las = 1)
par(mfrow = c(1,1)) } # reset to normal single plot

idx_sorted <- sort(alpha/beta, decreasing = TRUE, index.return = TRUE)$ix
SR <- colMeans(X)/sqrt(diag(var(X)))
ranking <- cbind("alpha/beta" = (alpha/beta)[idx_sorted],
SR = SR[idx_sorted],
alpha = alpha[idx_sorted],
beta = beta[idx_sorted])
print(ranking)
## alpha/beta SR alpha beta
## XIVH 7.249952e-04 0.13919483 1.810392e-03 2.4971086
## JKD 1.799548e-04 0.15386030 1.628182e-04 0.9047726
## USMV 1.790901e-04 0.12280069 1.166175e-04 0.6511664
## SPLV 1.580239e-04 0.10887811 1.070942e-04 0.6777087
## SPY 7.091732e-05 0.14170622 7.142366e-05 1.0071399
## SPHB -1.551301e-04 0.07401545 -2.422129e-04 1.5613538
#> alpha/beta SR alpha beta
#> XIVH 7.249952e-04 0.13919483 1.810392e-03 2.4971086
#> JKD 2.892417e-04 0.17682677 2.569578e-04 0.8883843
#> USMV 1.790904e-04 0.12280053 1.166177e-04 0.6511667
#> SPLV 1.580189e-04 0.10887903 1.070918e-04 0.6777149
#> SPY 7.091574e-05 0.14170591 7.142225e-05 1.0071424
#> SPHB -1.551287e-04 0.07401566 -2.422107e-04 1.5613533
# Alternative approach: Create proxy factors using available data
library(xts)
library(quantmod)
cat("Creating alternative factor data...\n")
## Creating alternative factor data...
# Set date range to match your stock data
begin_date <- "2013-01-01"
end_date <- "2017-08-31"
# Download market indices and create proxy factors
tryCatch({
cat("Downloading market data for factor construction...\n")
# S&P 500 for market factor
sp500 <- getSymbols("^GSPC", from = begin_date, to = end_date, auto.assign = FALSE)
sp500_returns <- diff(log(Ad(sp500)), na.pad = FALSE)
# Russell 2000 for size factor proxy
russell2000 <- getSymbols("^RUT", from = begin_date, to = end_date, auto.assign = FALSE)
russell_returns <- diff(log(Ad(russell2000)), na.pad = FALSE)
# Use treasury rate as risk-free rate proxy (use 3-month treasury)
# Note: This is a simplified approach
rf_rate <- 0.01 / 252 # Assume 1% annual risk-free rate, convert to daily
# Create proxy factors
# Market factor (Mkt-RF): Market return minus risk-free rate
mkt_rf <- sp500_returns - rf_rate
# Size factor (SMB): Difference between small and large cap returns
# Simplified: Use Russell 2000 vs S&P 500 as proxy
smb <- russell_returns - sp500_returns
# Value factor (HML): Simplified proxy (usually requires more complex construction)
# For simplicity, create a synthetic factor with some autocorrelation
set.seed(123) # For reproducible results
hml_values <- rnorm(nrow(sp500_returns), mean = 0, sd = 0.005)
hml <- xts(hml_values, order.by = index(sp500_returns))
# Combine into factor matrix
F <- cbind(mkt_rf, smb, hml)
colnames(F) <- c("Mkt.RF", "SMB", "HML")
cat("Alternative factors created successfully\n")
cat("Factor dimensions:", dim(F), "\n")
# Display sample
cat("\nFirst 6 rows of alternative factors:\n")
print(head(F))
cat("\nLast 6 rows of alternative factors:\n")
print(tail(F))
# Summary statistics
cat("\nFactor summary statistics:\n")
print(summary(as.data.frame(F)))
cat("\nAlternative factor data ready for analysis!\n")
cat("Note: These are proxy factors, not the official Fama-French factors\n")
}, error = function(e) {
cat("Error creating alternative factors:", e$message, "\n")
stop("Failed to create alternative factor data")
})
## Downloading market data for factor construction...
## Alternative factors created successfully
## Factor dimensions: 1174 3
##
## First 6 rows of alternative factors:
## Mkt.RF SMB HML
## 2013-01-03 -0.002127478 0.0011485079 -0.0028023782
## 2013-01-04 0.004813617 0.0026250267 -0.0011508874
## 2013-01-07 -0.003167686 -0.0006898157 0.0077935416
## 2013-01-08 -0.003287322 0.0019908831 0.0003525420
## 2013-01-09 0.002612663 0.0028316156 0.0006464387
## 2013-01-10 0.007529018 -0.0056036500 0.0085753249
##
## Last 6 rows of alternative factors:
## Mkt.RF SMB HML
## 2017-08-23 -0.0034992527 0.0021462792 -0.0064435812
## 2017-08-24 -0.0021162993 0.0050945404 0.0074120136
## 2017-08-25 0.0016317891 0.0009235993 0.0019257741
## 2017-08-28 0.0004472709 0.0029772411 0.0067082015
## 2017-08-29 0.0008027842 0.0002060658 -0.0047858524
## 2017-08-30 0.0045648492 0.0009017110 0.0008339064
##
## Factor summary statistics:
## Mkt.RF SMB HML
## Min. :-0.0402511 Min. :-1.629e-02 Min. :-1.405e-02
## 1st Qu.:-0.0029987 1st Qu.:-3.383e-03 1st Qu.:-3.169e-03
## Median : 0.0003806 Median : 3.938e-05 Median : 9.928e-05
## Mean : 0.0004025 Mean :-4.557e-05 Mean : 1.112e-04
## 3rd Qu.: 0.0046251 3rd Qu.: 3.103e-03 3rd Qu.: 3.299e-03
## Max. : 0.0382516 Max. : 2.569e-02 Max. : 1.621e-02
##
## Alternative factor data ready for analysis!
## Note: These are proxy factors, not the official Fama-French factors
K <- 3
alpha <- colMeans(X)
X_ <- X - matrix(alpha, T, N, byrow = TRUE)
Sigma_prev <- matrix(0, N, N)
Sigma <- (1/(T-1)) * t(X_) %*% X_
eigSigma <- eigen(Sigma)
while (norm(Sigma - Sigma_prev, "F")/norm(Sigma, "F") > 1e-3) {
B <- eigSigma$vectors[, 1:K, drop = FALSE] %*% diag(sqrt(eigSigma$values[1:K]), K, K)
Psi <- diag(diag(Sigma - B %*% t(B)))
Sigma_prev <- Sigma
Sigma <- B %*% t(B) + Psi
eigSigma <- eigen(Sigma - Psi)
}
cbind(alpha, B)
## alpha
## SPY 0.0006831797 -0.002764581 -0.003632401 -1.163576e-03
## XIVH 0.0033271831 -0.023771161 0.002506272 2.260428e-05
## SPHB 0.0007061831 -0.004793759 -0.007496172 3.402997e-03
## SPLV 0.0005187473 -0.001244995 -0.002399070 -3.741520e-03
## USMV 0.0005121483 -0.001296234 -0.002229389 -3.175177e-03
## JKD 0.0007123943 -0.002694075 -0.002866826 -1.803859e-03
#> alpha
#> AAPL 0.0007074564 0.0002732114 -0.004631647 -0.0044814226
#> AMD 0.0013722468 0.0045782146 -0.035202146 0.0114549515
#> ADI 0.0006533116 0.0004151904 -0.007379066 -0.0053058139
#> ABBV 0.0007787929 0.0017513359 -0.003967816 -0.0056000810
#> AEZS -0.0041576357 0.0769496344 0.002935950 0.0006249473
#> A 0.0006902482 0.0012690079 -0.005680162 -0.0061507654
#> APD 0.0006236565 0.0005442926 -0.004229364 -0.0057976394
#> AA 0.0006277163 0.0027405024 -0.009796620 -0.0149177957
#> CF -0.0000573028 0.0023108605 -0.007409061 -0.0153425661
# Your approach was already working well
statistical_factor_model <- function(X, K) {
# Step 1: Get factors via PCA
pca <- prcomp(X, center = TRUE, scale. = FALSE)
F <- pca$x[, 1:K] # First K principal components
# Step 2: Estimate loadings (your working code)
F_ <- cbind(ones = 1, F)
Gamma <- t(solve(t(F_) %*% F_, t(F_) %*% X))
colnames(Gamma) <- c("alpha", paste0("factor", 1:K))
return(list(
alpha = Gamma[, 1],
beta = Gamma[, -1],
factors = F,
loadings = Gamma
))
}
# Use it
K <- 3
factor_model <- statistical_factor_model(X, K)
cbind(alpha = factor_model$alpha, beta = factor_model$beta)
## alpha factor1 factor2 factor3
## SPY 0.0006831797 0.11228681 0.3734384 -0.183360963
## XIVH 0.0033271831 0.96549450 -0.2576638 0.003562074
## SPHB 0.0007061831 0.19470434 0.7706634 0.536257944
## SPLV 0.0005187473 0.05056699 0.2466426 -0.589603864
## USMV 0.0005121483 0.05264810 0.2291981 -0.500357262
## JKD 0.0007123943 0.10942313 0.2947315 -0.284259356