============================================================

FINANCIAL MARKETS — EXPLORATORY DATA ANALYSIS PROJECT

Author : [Your Name]

Date : 2025

Purpose : Comprehensive EDA of stock market & macro data

============================================================

── 0. INSTALL & LOAD PACKAGES ──────────────────────────────

packages <- c( “tidyverse”, # data wrangling + ggplot2 “lubridate”, # date handling “scales”, # axis formatting “patchwork”, # combining plots “corrplot”, # correlation heatmap “ggridges”, # ridge / joy plots “zoo”, # rolling averages “moments”, # skewness & kurtosis “PerformanceAnalytics”, # financial metrics “knitr”, # tables “kableExtra” # styled tables )

installed <- rownames(installed.packages()) to_install <- packages[!packages %in% installed] if (length(to_install)) install.packages(to_install, dependencies = TRUE) invisible(lapply(packages, library, character.only = TRUE))

── 1. SIMULATE REALISTIC FINANCIAL DATA ────────────────────

(Replace this block with real data from quantmod / Yahoo Finance)

set.seed(42) n <- 756 # 3 years of trading days

dates <- seq.Date(as.Date(“2022-01-03”), by = “day”, length.out = n) # Keep only weekdays (Mon–Fri) dates <- dates[!weekdays(dates) %in% c(“Saturday”, “Sunday”)][1:504] n <- length(dates)

Simulate correlated stock returns via Cholesky decomposition

sigma <- matrix(c( 1.00, 0.75, 0.55, 0.40, -0.35, 0.75, 1.00, 0.60, 0.35, -0.30, 0.55, 0.60, 1.00, 0.25, -0.20, 0.40, 0.35, 0.25, 1.00, -0.50, -0.35, -0.30, -0.20, -0.50, 1.00 ), nrow = 5)

chol_sigma <- chol(sigma) raw <- matrix(rnorm(n * 5, mean = 0.0004, sd = 0.012), nrow = n) corr_ret <- raw %*% chol_sigma

tickers <- c(“AAPL”, “MSFT”, “GOOGL”, “JPM”, “GLD”) colnames(corr_ret) <- tickers

Cumulative prices from returns

prices <- apply(corr_ret, 2, function(r) cumprod(1 + r) * c(150, 280, 92, 130, 170))

Build long-format data frames

df_returns <- as_tibble(corr_ret) %>% mutate(Date = dates) %>% pivot_longer(-Date, names_to = “Ticker”, values_to = “Return”)

df_prices <- as_tibble(prices) %>% mutate(Date = dates) %>% pivot_longer(-Date, names_to = “Ticker”, values_to = “Price”)

Macro indicators (simulated)

df_macro <- tibble( Date = dates, Fed_Rate = pmax(0, cumsum(rnorm(n, 0.001, 0.005)) + 0.5), Inflation = pmax(0, cumsum(rnorm(n, 0.0008, 0.004)) + 2.0), VIX = pmax(10, 20 + cumsum(rnorm(n, 0, 0.3))) )

cat(“✅ Data created successfully”) cat(sprintf(” Trading days : %d“, n)) cat(sprintf(” Tickers : %s“, paste(tickers, collapse =”, “))) cat(sprintf(” Date range : %s → %s“, min(dates), max(dates)))

── 2. DESCRIPTIVE STATISTICS ────────────────────────────────

cat(“── DESCRIPTIVE STATISTICS ──────────────────────────────”)

summary_stats <- df_returns %>% group_by(Ticker) %>% summarise( N = n(), Mean_Ret = mean(Return), Median_Ret = median(Return), SD = sd(Return), Skewness = moments::skewness(Return), Kurtosis = moments::kurtosis(Return), Min = min(Return), Max = max(Return), Ann_Ret = (prod(1 + Return)^(252 / n()) - 1), Ann_Vol = sd(Return) * sqrt(252), Sharpe = (mean(Return) * 252) / (sd(Return) * sqrt(252)), .groups = “drop” ) %>% mutate(across(where(is.numeric), ~ round(.x, 4)))

print(kable(summary_stats, format = “simple”, caption = “Return Statistics by Ticker”))

── 3. COLOUR PALETTE & THEME ────────────────────────────────

pal <- c( AAPL = “#0A84FF”, MSFT = “#30D158”, GOOGL = “#FFD60A”, JPM = “#FF453A”, GLD = “#FF9F0A” )

theme_finance <- function() { theme_minimal(base_size = 12) + theme( plot.background = element_rect(fill = “#0D1117”, color = NA), panel.background = element_rect(fill = “#161B22”, color = NA), panel.grid.major = element_line(color = “#21262D”), panel.grid.minor = element_blank(), text = element_text(color = “#E6EDF3”), axis.text = element_text(color = “#8B949E”), axis.title = element_text(color = “#C9D1D9”), plot.title = element_text(color = “#F0F6FC”, face = “bold”, size = 14), plot.subtitle = element_text(color = “#8B949E”, size = 10), legend.background = element_rect(fill = “#161B22”, color = NA), legend.key = element_rect(fill = NA), strip.text = element_text(color = “#C9D1D9”, face = “bold”), plot.caption = element_text(color = “#484F58”, size = 8) ) }

── 4. PLOT 1 — Normalised Price Performance ─────────────────

p1 <- df_prices %>% group_by(Ticker) %>% mutate(Indexed = Price / first(Price) * 100) %>% ungroup() %>% ggplot(aes(Date, Indexed, color = Ticker)) + geom_line(linewidth = 0.7, alpha = 0.9) + geom_hline(yintercept = 100, linetype = “dashed”, color = “#484F58”) + scale_color_manual(values = pal) + scale_y_continuous(labels = label_number(suffix = ““)) + scale_x_date(date_breaks =”3 months”, date_labels = “%b ’%y”) + labs( title = “Normalised Price Performance (Base = 100)”, subtitle = “All tickers rebased to 100 at start of period”, x = NULL, y = “Indexed Price”, color = NULL, caption = “Source: Simulated data for illustration” ) + theme_finance()

print(p1) ggsave(“plot_01_price_performance.png”, p1, width = 11, height = 5.5, dpi = 180, bg = “#0D1117”) cat(“📊 Saved: plot_01_price_performance.png”)

── 5. PLOT 2 — Daily Return Distribution (Ridge Plot) ───────

p2 <- df_returns %>% ggplot(aes(x = Return, y = fct_reorder(Ticker, Return, sd, .desc = TRUE), fill = Ticker)) + geom_density_ridges(alpha = 0.75, scale = 1.4, bandwidth = 0.002, color = “#0D1117”) + geom_vline(xintercept = 0, linetype = “dashed”, color = “#8B949E”) + scale_fill_manual(values = pal, guide = “none”) + scale_x_continuous(labels = label_percent(accuracy = 0.1)) + labs( title = “Daily Return Distributions by Ticker”, subtitle = “Ridge density plot — wider spread = higher volatility”, x = “Daily Return (%)”, y = NULL, caption = “Source: Simulated data” ) + theme_finance()

print(p2) ggsave(“plot_02_return_distributions.png”, p2, width = 10, height = 6, dpi = 180, bg = “#0D1117”) cat(“📊 Saved: plot_02_return_distributions.png”)

── 6. PLOT 3 — 30-Day Rolling Volatility ────────────────────

p3 <- df_returns %>% group_by(Ticker) %>% arrange(Date) %>% mutate(RollVol = rollapply(Return, 30, sd, fill = NA, align = “right”) * sqrt(252)) %>% ungroup() %>% filter(!is.na(RollVol)) %>% ggplot(aes(Date, RollVol, color = Ticker)) + geom_line(linewidth = 0.65, alpha = 0.85) + scale_color_manual(values = pal) + scale_y_continuous(labels = label_percent(accuracy = 1)) + scale_x_date(date_breaks = “3 months”, date_labels = “%b ’%y”) + labs( title = “30-Day Rolling Annualised Volatility”, subtitle = “Higher values indicate greater price uncertainty”, x = NULL, y = “Annualised Volatility”, color = NULL, caption = “Source: Simulated data” ) + theme_finance()

print(p3) ggsave(“plot_03_rolling_volatility.png”, p3, width = 11, height = 5.5, dpi = 180, bg = “#0D1117”) cat(“📊 Saved: plot_03_rolling_volatility.png”)

── 7. PLOT 4 — Correlation Heatmap ──────────────────────────

ret_wide <- df_returns %>% pivot_wider(names_from = Ticker, values_from = Return) %>% select(-Date)

cor_mat <- cor(ret_wide, use = “complete.obs”)

Save as PNG using corrplot

png(“plot_04_correlation_heatmap.png”, width = 700, height = 620, bg = “#0D1117”, res = 120) corrplot( cor_mat, method = “color”, type = “upper”, tl.col = “#C9D1D9”, tl.cex = 0.9, addCoef.col = “#E6EDF3”, number.cex = 0.8, col = colorRampPalette(c(“#FF453A”, “#161B22”, “#0A84FF”))(200), bg = “#0D1117”, mar = c(0, 0, 2, 0), title = “Return Correlation Matrix” ) dev.off() cat(“📊 Saved: plot_04_correlation_heatmap.png”)

── 8. PLOT 5 — Risk vs Return Scatter ───────────────────────

risk_return <- summary_stats %>% select(Ticker, Ann_Ret, Ann_Vol, Sharpe)

p5 <- risk_return %>% ggplot(aes(Ann_Vol, Ann_Ret, color = Ticker, size = Sharpe)) + geom_point(alpha = 0.9) + geom_text(aes(label = Ticker), vjust = -1.3, size = 3.5, fontface = “bold”, color = “#E6EDF3”) + scale_color_manual(values = pal, guide = “none”) + scale_size_continuous(range = c(4, 12), name = “Sharpe Ratio”) + scale_x_continuous(labels = label_percent(accuracy = 0.1)) + scale_y_continuous(labels = label_percent(accuracy = 0.1)) + labs( title = “Risk vs. Return — Efficient Frontier View”, subtitle = “Bubble size = Sharpe ratio | Top-left = superior risk-adjusted return”, x = “Annualised Volatility (Risk)”, y = “Annualised Return”, caption = “Source: Simulated data” ) + theme_finance()

print(p5) ggsave(“plot_05_risk_return.png”, p5, width = 9, height = 6, dpi = 180, bg = “#0D1117”) cat(“📊 Saved: plot_05_risk_return.png”)

── 9. PLOT 6 — Macro Indicators Over Time ───────────────────

p_fed <- df_macro %>% ggplot(aes(Date, Fed_Rate)) + geom_line(color = “#FF453A”, linewidth = 0.7) + scale_y_continuous(labels = label_percent(accuracy = 0.1, scale = 1)) + labs(title = “Fed Funds Rate (%)”, x = NULL, y = NULL) + theme_finance()

p_inf <- df_macro %>% ggplot(aes(Date, Inflation)) + geom_area(fill = “#FF9F0A”, alpha = 0.3) + geom_line(color = “#FF9F0A”, linewidth = 0.7) + scale_y_continuous(labels = label_number(suffix = “%”, accuracy = 0.1)) + labs(title = “Inflation Rate (%)”, x = NULL, y = NULL) + theme_finance()

p_vix <- df_macro %>% ggplot(aes(Date, VIX)) + geom_line(color = “#30D158”, linewidth = 0.7) + geom_hline(yintercept = 20, linetype = “dashed”, color = “#484F58”) + annotate(“text”, x = min(dates) + 10, y = 21, label = “Threshold: 20”, color = “#484F58”, size = 3) + labs(title = “VIX (Market Fear Index)”, x = NULL, y = NULL) + theme_finance()

p6 <- (p_fed / p_inf / p_vix) + plot_annotation( title = “Macroeconomic Indicators”, subtitle = “Fed Rate | Inflation | VIX — 2022 to 2024”, theme = theme( plot.background = element_rect(fill = “#0D1117”, color = NA), plot.title = element_text(color = “#F0F6FC”, face = “bold”, size = 14), plot.subtitle = element_text(color = “#8B949E”, size = 10) ) )

print(p6) ggsave(“plot_06_macro_indicators.png”, p6, width = 11, height = 9, dpi = 180, bg = “#0D1117”) cat(“📊 Saved: plot_06_macro_indicators.png”)

── 10. PLOT 7 — Monthly Return Heatmap ──────────────────────

monthly_ret <- df_returns %>% mutate( Year = year(Date), Month = month(Date, label = TRUE, abbr = TRUE) ) %>% group_by(Ticker, Year, Month) %>% summarise(MonthlyReturn = prod(1 + Return) - 1, .groups = “drop”)

p7 <- monthly_ret %>% filter(Ticker %in% c(“AAPL”, “MSFT”, “JPM”)) %>% ggplot(aes(Month, factor(Year), fill = MonthlyReturn)) + geom_tile(color = “#0D1117”, linewidth = 0.4) + geom_text(aes(label = sprintf(“%.1f%%”, MonthlyReturn * 100)), size = 2.6, color = “#E6EDF3”) + scale_fill_gradient2( low = “#FF453A”, mid = “#161B22”, high = “#30D158”, midpoint = 0, labels = label_percent(accuracy = 1), name = “Monthly” ) + facet_wrap(~Ticker, ncol = 1) + labs( title = “Monthly Return Heatmap”, subtitle = “Green = positive months | Red = negative months”, x = NULL, y = NULL, caption = “Source: Simulated data” ) + theme_finance() + theme(axis.text.x = element_text(size = 8))

print(p7) ggsave(“plot_07_monthly_heatmap.png”, p7, width = 10, height = 8, dpi = 180, bg = “#0D1117”) cat(“📊 Saved: plot_07_monthly_heatmap.png”)

── 11. PLOT 8 — Drawdown Analysis ───────────────────────────

drawdown_fn <- function(ret_vec) { wealth <- cumprod(1 + ret_vec) peak <- cummax(wealth) drawdown <- (wealth - peak) / peak drawdown }

p8 <- df_returns %>% group_by(Ticker) %>% arrange(Date) %>% mutate(Drawdown = drawdown_fn(Return)) %>% ungroup() %>% ggplot(aes(Date, Drawdown, color = Ticker)) + geom_line(linewidth = 0.65, alpha = 0.85) + geom_hline(yintercept = 0, color = “#484F58”) + scale_color_manual(values = pal) + scale_y_continuous(labels = label_percent(accuracy = 1)) + scale_x_date(date_breaks = “3 months”, date_labels = “%b ’%y”) + labs( title = “Portfolio Drawdown Analysis”, subtitle = “Percentage decline from each asset’s rolling peak”, x = NULL, y = “Drawdown (%)”, color = NULL, caption = “Source: Simulated data” ) + theme_finance()

print(p8) ggsave(“plot_08_drawdown.png”, p8, width = 11, height = 5.5, dpi = 180, bg = “#0D1117”) cat(“📊 Saved: plot_08_drawdown.png”)

── 12. FINAL SUMMARY REPORT ─────────────────────────────────

cat(“”) cat(“══════════════════════════════════════════════════════”) cat(” FINANCIAL EDA PROJECT — SUMMARY“) cat(”══════════════════════════════════════════════════════“) cat(sprintf(” Period : %s → %s“, min(dates), max(dates))) cat(sprintf(” Trading Days: %d“, n)) cat(sprintf(” Assets : %s“, paste(tickers, collapse =”, “))) cat(”KEY FINDINGS:“)

best_ret <- summary_stats$Ticker[which.max(summary_stats$Ann_Ret)] lowest_vol <- summary_stats$Ticker[which.min(summary_stats$Ann_Vol)] best_sharpe <- summary_stats$Ticker[which.max(summary_stats$Sharpe)] highest_cor <- which(cor_mat == max(cor_mat[cor_mat < 1]), arr.ind = TRUE)[1, ]

cat(sprintf(” • Best Annualised Return : %s (%.1f%%)“, best_ret, max(summary_stats$Ann_Ret) * 100)) cat(sprintf(" • Lowest Volatility : %s (%.1f%% ann.)\n", lowest_vol, min(summary_stats$Ann_Vol) * 100)) cat(sprintf(” • Best Sharpe Ratio : %s (%.2f)“, best_sharpe, max(summary_stats$Sharpe))) cat(sprintf(” • Highest Asset Pair Cor : %s & %s (%.2f)“, rownames(cor_mat)[highest_cor[1]], colnames(cor_mat)[highest_cor[2]], cor_mat[highest_cor[1], highest_cor[2]])) cat(”OUTPUT FILES:“) for (f in paste0(”plot_0”, 1:8, c(“_price_performance”,“_return_distributions”, “_rolling_volatility”,“_correlation_heatmap”,“_risk_return”, “_macro_indicators”,“_monthly_heatmap”,“_drawdown”), “.png”)) { cat(sprintf(” ✔ %s“, f)) } cat(”══════════════════════════════════════════════════════“)