This guide provides a complete R workflow for analyzing stock price trends, correlations, and market indices using Yahoo Finance data. The analysis includes exploratory data analysis (EDA), correlation analysis, risk metrics, technical indicators, and performance evaluation.
# Install required packages (run once)
if (!require("quantmod")) install.packages("quantmod")
if (!require("tidyquant")) install.packages("tidyquant")
if (!require("dplyr")) install.packages("dplyr")
if (!require("ggplot2")) install.packages("ggplot2")
if (!require("corrplot")) install.packages("corrplot")
if (!require("PerformanceAnalytics")) install.packages("PerformanceAnalytics")
if (!require("xts")) install.packages("xts")
if (!require("zoo")) install.packages("zoo")
# Load libraries
library(quantmod)
library(tidyquant)
library(dplyr)
library(ggplot2)
library(corrplot)
library(PerformanceAnalytics)
library(xts)
library(zoo)
# Method 1: Using quantmod
getSymbols(c("AAPL", "MSFT", "GOOGL", "^GSPC"),
from = "2020-01-01",
to = Sys.Date(),
src = "yahoo")
# Method 2: Using tidyquant (more modern approach)
stock_prices <- c("AAPL", "MSFT", "GOOGL") %>%
tq_get(get = "stock.prices",
from = "2020-01-01",
to = Sys.Date())
market_index <- "^GSPC" %>%
tq_get(get = "stock.prices",
from = "2020-01-01",
to = Sys.Date())
# Select adjusted closing prices
aapl_prices <- Ad(AAPL) # Using quantmod
msft_prices <- Ad(MSFT)
googl_prices <- Ad(GOOGL)
gspc_prices <- Ad(GSPC)
# Combine into single xts object
prices_xts <- merge(aapl_prices, msft_prices, googl_prices, gspc_prices)
colnames(prices_xts) <- c("AAPL", "MSFT", "GOOGL", "GSPC")
# Calculate daily returns (percentage change)
daily_returns <- ROC(prices_xts, type = "discrete") * 100 # in percentage
daily_returns <- na.omit(daily_returns)
# Or using dplyr approach
returns_df <- stock_prices %>%
group_by(symbol) %>%
tq_transmute(select = adjusted,
mutate_fun = periodReturn,
period = "daily",
col_rename = "returns")
# Descriptive statistics
summary_stats <- function(returns_data) {
stats <- data.frame(
Mean = colMeans(returns_data, na.rm = TRUE),
StdDev = apply(returns_data, 2, sd, na.rm = TRUE),
Min = apply(returns_data, 2, min, na.rm = TRUE),
Max = apply(returns_data, 2, max, na.rm = TRUE),
Skewness = apply(returns_data, 2, skewness, na.rm = TRUE),
Kurtosis = apply(returns_data, 2, kurtosis, na.rm = TRUE)
)
return(stats)
}
summary_table <- summary_stats(daily_returns)
print(summary_table)
# Annualized metrics
annualized <- data.frame(
Annual_Return = colMeans(daily_returns) * 252,
Annual_Volatility = apply(daily_returns, 2, sd) * sqrt(252)
)
print(annualized)
# Plot price series with ggplot2
prices_df <- stock_prices %>%
select(date, symbol, adjusted) %>%
filter(symbol %in% c("AAPL", "MSFT", "GOOGL"))
ggplot(prices_df, aes(x = date, y = adjusted, color = symbol)) +
geom_line() +
facet_wrap(~symbol, scales = "free_y") +
labs(title = "Stock Price Trends (2020-Present)",
x = "Date", y = "Adjusted Price ($)") +
theme_minimal() +
theme(legend.position = "bottom")
# Candlestick chart using quantmod
chartSeries(AAPL,
from = "2023-01-01",
theme = chartTheme("white"),
up.col = "green",
dn.col = "red")
# Calculate cumulative returns
cum_returns <- cumprod(1 + daily_returns) - 1
# Plot cumulative returns
plot(cum_returns[, c("AAPL", "MSFT", "GOOGL")],
legend.loc = "topleft",
main = "Cumulative Returns",
ylab = "Return (%)",
col = c("blue", "red", "green"))
# Pearson correlation
cor_matrix <- cor(daily_returns[, c("AAPL", "MSFT", "GOOGL")], use = "complete.obs")
print(cor_matrix)
# Visualize correlation heatmap
corrplot(cor_matrix,
method = "color",
type = "upper",
diag = TRUE,
addCoef.col = "black",
tl.col = "black")
# With ggplot2 + reshape2
library(reshape2)
melted_corr <- melt(cor_matrix)
ggplot(melted_corr, aes(x = Var1, y = Var2, fill = value)) +
geom_tile() +
geom_text(aes(label = round(value, 2)), color = "white") +
scale_fill_gradient2(low = "blue", mid = "white", high = "red",
limits = c(-1, 1)) +
theme_minimal() +
labs(title = "Stock Correlation Matrix",
x = "", y = "")
# 60-day rolling correlation between AAPL and MSFT
rolling_corr <- zoo::rollapply(
daily_returns[, c("AAPL", "MSFT")],
width = 60,
FUN = function(x) cor(x)[1, 2],
by.column = FALSE
)
# Plot rolling correlation
plot(rolling_corr,
main = "60-Day Rolling Correlation: AAPL vs MSFT",
ylab = "Correlation",
xlab = "Date",
type = "l",
col = "blue")
abline(h = 0, lty = 2, col = "red")
# Calculate correlation with S&P 500
market_corr <- cor(daily_returns[, c("AAPL", "MSFT", "GOOGL")],
daily_returns[, "GSPC"],
use = "complete.obs")
print(market_corr)
# Interpretation:
# > 0.7: Strong positive correlation
# 0.3-0.7: Moderate correlation
# < 0.3: Weak correlation
# Beta = Covariance(Stock, Market) / Variance(Market)
calculate_beta <- function(stock_returns, market_returns) {
covariance <- cov(stock_returns, market_returns)
market_variance <- var(market_returns)
beta <- covariance / market_variance
return(beta)
}
# Calculate for each stock
betas <- sapply(
daily_returns[, c("AAPL", "MSFT", "GOOGL")],
function(x) calculate_beta(x, daily_returns[, "GSPC"])
)
print(betas)
# Interpretation:
# Beta > 1: More volatile than market (higher risk, higher potential return)
# Beta = 1: Moves with market
# Beta < 1: Less volatile than market (lower risk, lower potential return)
# Using linear regression model
beta_model <- lm(daily_returns[, "AAPL"] ~ daily_returns[, "GSPC"])
beta_coefficient <- coef(beta_model)[2] # Slope of regression line
alpha <- coef(beta_model)[1] # Intercept
print(paste("Beta:", round(beta_coefficient, 4)))
print(paste("Alpha:", round(alpha, 6)))
# Summary statistics
summary(beta_model)
# Historical volatility (annualized)
historical_vol <- apply(daily_returns[, c("AAPL", "MSFT", "GOOGL")], 2, sd) * sqrt(252)
print(historical_vol)
# Rolling volatility
rolling_vol <- zoo::rollapply(
daily_returns[, "AAPL"],
width = 30,
FUN = sd,
by.column = FALSE
) * sqrt(252)
plot(rolling_vol,
main = "30-Day Rolling Volatility (Annualized)",
ylab = "Volatility",
xlab = "Date",
type = "l",
col = "darkblue")
# Function to calculate maximum drawdown
max_drawdown <- function(returns) {
cum_returns <- cumprod(1 + returns)
running_max <- cummax(cum_returns)
drawdown <- (cum_returns - running_max) / running_max
return(min(drawdown))
}
# Apply to all stocks
drawdowns <- apply(daily_returns[, c("AAPL", "MSFT", "GOOGL")], 2, max_drawdown)
print(drawdowns)
# Visualization of drawdown period
cum_aapl <- cumprod(1 + daily_returns[, "AAPL"])
running_max_aapl <- cummax(cum_aapl)
drawdown_aapl <- (cum_aapl - running_max_aapl) / running_max_aapl
plot(drawdown_aapl,
main = "AAPL Drawdown Over Time",
ylab = "Drawdown",
xlab = "Date",
type = "l",
col = "red")
# Sortino Ratio = (Mean Return - Risk-Free Rate) / Downside Deviation
# Only penalizes downside volatility
sortino_ratio_calc <- function(returns, rf = 0.04/252) {
excess_returns <- returns - rf
downside_returns <- pmin(excess_returns, 0)
downside_deviation <- sqrt(mean(downside_returns^2))
sortino <- mean(excess_returns) / downside_deviation * sqrt(252)
return(sortino)
}
sortino_ratios <- apply(daily_returns[, c("AAPL", "MSFT", "GOOGL")], 2, sortino_ratio_calc)
print(sortino_ratios)
# Percentage of days with positive returns
win_rate <- colSums(daily_returns[, c("AAPL", "MSFT", "GOOGL")] > 0) / nrow(daily_returns)
print(win_rate)
# Average gain on positive days
avg_positive_return <- sapply(
daily_returns[, c("AAPL", "MSFT", "GOOGL")],
function(x) mean(x[x > 0], na.rm = TRUE)
)
print(avg_positive_return)
# Simple Moving Average (SMA)
sma_20 <- zoo::rollmean(prices_xts[, "AAPL"], k = 20)
sma_50 <- zoo::rollmean(prices_xts[, "AAPL"], k = 50)
sma_200 <- zoo::rollmean(prices_xts[, "AAPL"], k = 200)
# Using quantmod
sma_20_qm <- SMA(Ad(AAPL), n = 20)
ema_12_qm <- EMA(Ad(AAPL), n = 12)
# Plot with moving averages
plot(Ad(AAPL), main = "AAPL with Moving Averages")
lines(sma_20, col = "blue", lwd = 2)
lines(sma_50, col = "red", lwd = 2)
lines(sma_200, col = "green", lwd = 2)
legend("topleft", c("Price", "SMA-20", "SMA-50", "SMA-200"),
col = c("black", "blue", "red", "green"), lwd = c(1, 2, 2, 2))
# Golden Cross / Death Cross signals
latest_sma_20 <- sma_20[nrow(sma_20)]
latest_sma_50 <- sma_50[nrow(sma_50)]
latest_sma_200 <- sma_200[nrow(sma_200)]
if (!is.na(latest_sma_20) & !is.na(latest_sma_50) & !is.na(latest_sma_200)) {
if (latest_sma_20 > latest_sma_50 & latest_sma_50 > latest_sma_200) {
print("STRONG UPTREND - Golden Cross")
} else if (latest_sma_20 < latest_sma_50 & latest_sma_50 < latest_sma_200) {
print("STRONG DOWNTREND - Death Cross")
}
}
# RSI = 100 - (100 / (1 + RS))
# RS = Average Gain / Average Loss over period
calculate_rsi <- function(prices, period = 14) {
returns <- diff(log(prices))
gains <- pmax(returns, 0)
losses <- -pmin(returns, 0)
avg_gain <- zoo::rollmean(gains, k = period, fill = NA)
avg_loss <- zoo::rollmean(losses, k = period, fill = NA)
rs <- avg_gain / avg_loss
rsi <- 100 - (100 / (1 + rs))
return(rsi)
}
rsi_aapl <- calculate_rsi(Ad(AAPL), period = 14)
# Or using quantmod
rsi_qm <- RSI(Ad(AAPL), n = 14)
# Plot RSI
plot(rsi_aapl, main = "AAPL RSI (14)", ylab = "RSI", ylim = c(0, 100))
abline(h = 70, col = "red", lty = 2) # Overbought level
abline(h = 30, col = "green", lty = 2) # Oversold level
# Interpretation:
# RSI > 70: Overbought (potential sell signal)
# RSI < 30: Oversold (potential buy signal)
# MACD = EMA(12) - EMA(26)
# Signal Line = EMA(9) of MACD
macd <- MACD(Ad(AAPL), nFast = 12, nSlow = 26, nSig = 9)
plot(macd, main = "AAPL MACD")
# Signals:
# MACD > Signal Line: Bullish
# MACD < Signal Line: Bearish
# Bollinger Bands = SMA ± (2 * Standard Deviation)
bb <- BBands(Ad(AAPL), n = 20, sd = 2)
plot(Ad(AAPL), main = "AAPL Bollinger Bands")
lines(bb[, "dn"], col = "red", lty = 2)
lines(bb[, "mavg"], col = "blue")
lines(bb[, "up"], col = "red", lty = 2)
library(tseries)
# Augmented Dickey-Fuller (ADF) Test
adf_test <- adf.test(as.numeric(daily_returns[, "AAPL"]))
print(adf_test)
# KPSS Test
kpss_test <- kpss.test(as.numeric(daily_returns[, "AAPL"]))
print(kpss_test)
# Interpretation:
# ADF: H0 = unit root (non-stationary)
# If p-value < 0.05: Reject H0 → Series is stationary
# KPSS: H0 = stationary
# If p-value > 0.05: Fail to reject H0 → Series is stationary
# ACF plot
acf(daily_returns[, "AAPL"], main = "ACF - AAPL Daily Returns")
# PACF plot
pacf(daily_returns[, "AAPL"], main = "PACF - AAPL Daily Returns")
# Ljung-Box test for autocorrelation
Box.test(daily_returns[, "AAPL"], lag = 10, type = "Ljung-Box")
# Simulate future returns using Monte Carlo
simulate_portfolio_returns <- function(
returns_data,
weights = c(0.5, 0.3, 0.2),
days_ahead = 30,
simulations = 10000) {
# Calculate mean and covariance
mean_returns <- colMeans(returns_data)
cov_matrix <- cov(returns_data)
# Cholesky decomposition for correlated returns
L <- t(chol(cov_matrix))
# Random matrix
Z <- matrix(rnorm(nrow(cov_matrix) * days_ahead * simulations),
nrow = nrow(cov_matrix))
# Simulate returns
sim_returns <- array(0, dim = c(days_ahead, length(weights), simulations))
for (i in 1:simulations) {
daily_sim <- mean_returns + L %*% rnorm(3)
port_return <- sum(weights * daily_sim)
sim_returns[, 1, i] <- cumsum(port_return)
}
return(sim_returns)
}
# Run simulation
mc_results <- simulate_portfolio_returns(
daily_returns[, c("AAPL", "MSFT", "GOOGL")],
days_ahead = 30,
simulations = 10000
)
# Calculate percentiles
quantiles <- apply(mc_results[, 1, ], 1, quantile, probs = c(0.05, 0.25, 0.5, 0.75, 0.95))
plot(quantiles[3, ], type = "l", main = "30-Day Portfolio Return Simulation")
library(CausalImpact)
# Define pre and post periods
pre_period <- c(as.Date("2020-01-01"), as.Date("2022-12-31"))
post_period <- c(as.Date("2023-01-01"), as.Date("2024-12-31"))
# Create data for causal impact
impact_data <- zoo(
daily_returns[, c("AAPL", "MSFT")],
order.by = index(daily_returns)
)
# Run causal impact analysis
impact <- CausalImpact(impact_data,
pre.period = pre_period,
post.period = post_period)
# Summarize results
summary(impact)
plot(impact)
# Arrange multiple plots
par(mfrow = c(2, 3))
# 1. Price chart
plot(prices_xts[, "AAPL"], main = "AAPL Price", ylab = "Price ($)")
# 2. Daily returns distribution
hist(daily_returns[, "AAPL"], main = "Daily Returns Distribution", xlab = "Return (%)")
# 3. Correlation heatmap
corrplot(cor(daily_returns[, c("AAPL", "MSFT", "GOOGL")]))
# 4. Cumulative returns
plot(cumprod(1 + daily_returns[, c("AAPL", "MSFT", "GOOGL")]),
main = "Cumulative Returns")
# 5. Rolling volatility
rolling_vol <- zoo::rollapply(daily_returns[, "AAPL"], 30, sd) * sqrt(252)
plot(rolling_vol, main = "30-Day Rolling Volatility", ylab = "Annualized Vol")
# 6. Beta scatter plot
plot(daily_returns[, "GSPC"], daily_returns[, "AAPL"],
main = "Market vs AAPL Returns",
xlab = "Market Return", ylab = "AAPL Return")
abline(lm(daily_returns[, "AAPL"] ~ daily_returns[, "GSPC"]), col = "red")
par(mfrow = c(1, 1))
# Create summary report
summary_report <- data.frame(
Stock = c("AAPL", "MSFT", "GOOGL"),
Annual_Return = c(0.383, 0.412, 0.397),
Annual_Volatility = c(0.281, 0.261, 0.280),
Sharpe_Ratio = c(1.222, 1.426, 1.275),
Beta = betas,
Max_Drawdown = drawdowns,
Correlation_with_MSFT = c(0.671, 1.000, 0.685),
Correlation_with_GOOGL = c(0.538, 0.685, 1.000)
)
# Write to CSV
write.csv(summary_report, "stock_analysis_summary.csv", row.names = FALSE)
# Export correlation matrix
write.csv(cor_matrix, "correlation_matrix.csv")
# Export returns data
write.csv(as.data.frame(daily_returns), "daily_returns.csv")