#1 Load libraries
library(tidyquant)
## Registered S3 method overwritten by 'quantmod':
## method from
## as.zoo.data.frame zoo
## ── Attaching core tidyquant packages ─────────────────────── tidyquant 1.0.11 ──
## ✔ PerformanceAnalytics 2.0.8 ✔ TTR 0.24.4
## ✔ quantmod 0.4.27 ✔ xts 0.14.1
## ── Conflicts ────────────────────────────────────────── tidyquant_conflicts() ──
## ✖ zoo::as.Date() masks base::as.Date()
## ✖ zoo::as.Date.numeric() masks base::as.Date.numeric()
## ✖ PerformanceAnalytics::legend() masks graphics::legend()
## ✖ quantmod::summary() masks base::summary()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(timetk)
##
## Attaching package: 'timetk'
##
## The following object is masked from 'package:tidyquant':
##
## FANG
library(dplyr)
##
## ######################### Warning from 'xts' package ##########################
## # #
## # The dplyr lag() function breaks how base R's lag() function is supposed to #
## # work, which breaks lag(my_xts). Calls to lag(my_xts) that you type or #
## # source() into this session won't work correctly. #
## # #
## # Use stats::lag() to make sure you're not using dplyr::lag(), or you can add #
## # conflictRules('dplyr', exclude = 'lag') to your .Rprofile to stop #
## # dplyr from breaking base R's lag() function. #
## # #
## # Code in packages is not affected. It's protected by R's namespace mechanism #
## # Set `options(xts.warn_dplyr_breaks_lag = FALSE)` to suppress this warning. #
## # #
## ###############################################################################
##
## Attaching package: 'dplyr'
##
## The following objects are masked from 'package:xts':
##
## first, last
##
## The following objects are masked from 'package:stats':
##
## filter, lag
##
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(readr)
library(lubridate)
##
## Attaching package: 'lubridate'
##
## The following objects are masked from 'package:base':
##
## date, intersect, setdiff, union
library(tidyr)
#2 Load required libraries
library(tidyquant)
library(timetk)
library(dplyr)
# Define tickers
tickers <- c("SPY", "QQQ", "EEM", "IWM", "EFA", "TLT", "IYR", "GLD")
# Step 1: Download daily adjusted prices from Yahoo Finance
etf_prices <- tq_get(tickers,
from = "2010-01-01",
to = Sys.Date(),
get = "stock.prices") %>%
select(symbol, date, adjusted)
# Step 2: Calculate weekly discrete returns
weekly_returns <- etf_prices %>%
group_by(symbol) %>%
tq_transmute(select = adjusted,
mutate_fun = periodReturn,
period = "weekly",
type = "arithmetic", # discrete return
col_rename = "weekly_return")
# Step 3: Calculate monthly discrete returns
monthly_returns <- etf_prices %>%
group_by(symbol) %>%
tq_transmute(select = adjusted,
mutate_fun = periodReturn,
period = "monthly",
type = "arithmetic",
col_rename = "monthly_return")
# Step 4: Preview results
print("Weekly Returns:")
## [1] "Weekly Returns:"
print(head(weekly_returns))
## # A tibble: 6 × 3
## # Groups: symbol [1]
## symbol date weekly_return
## <chr> <date> <dbl>
## 1 SPY 2010-01-08 0.0109
## 2 SPY 2010-01-15 -0.00812
## 3 SPY 2010-01-22 -0.0390
## 4 SPY 2010-01-29 -0.0167
## 5 SPY 2010-02-05 -0.00680
## 6 SPY 2010-02-12 0.0129
print("Monthly Returns:")
## [1] "Monthly Returns:"
print(head(monthly_returns))
## # A tibble: 6 × 3
## # Groups: symbol [1]
## symbol date monthly_return
## <chr> <date> <dbl>
## 1 SPY 2010-01-29 -0.0524
## 2 SPY 2010-02-26 0.0312
## 3 SPY 2010-03-31 0.0609
## 4 SPY 2010-04-30 0.0155
## 5 SPY 2010-05-28 -0.0795
## 6 SPY 2010-06-30 -0.0517
#3 Load necessary libraries
library(readr)
library(dplyr)
library(lubridate)
# Step 1: Read the Fama French 3-factor data CSV
# Replace with your actual file path if needed
ff_data_raw <- read_csv("F-F_Research_Data_Factors.CSV", skip = 3)
## New names:
## • `` -> `...1`
## Warning: One or more parsing issues, call `problems()` on your data frame for details,
## e.g.:
## dat <- vroom(...)
## problems(dat)
## Rows: 1283 Columns: 5
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (1): ...1
## dbl (4): Mkt-RF, SMB, HML, RF
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
# Step 2: Rename columns
colnames(ff_data_raw)[1:4] <- c("date", "Mkt_RF", "SMB", "HML")
# Step 3: Remove rows after "Annual Factors:" (non-data rows)
ff_data <- ff_data_raw %>%
filter(!is.na(as.numeric(date))) %>%
mutate(date = ymd(paste0(date, "01"))) # Convert YYYYMM to proper date
## Warning: There was 1 warning in `filter()`.
## ℹ In argument: `!is.na(as.numeric(date))`.
## Caused by warning:
## ! NAs introduced by coercion
## Warning: There was 1 warning in `mutate()`.
## ℹ In argument: `date = ymd(paste0(date, "01"))`.
## Caused by warning:
## ! 86 failed to parse.
# Step 4: Convert percentages to decimals
ff_data <- ff_data %>%
mutate(across(c(Mkt_RF, SMB, HML), ~ . / 100))
# Step 5: Keep only data from 2010 onward
ff_data <- ff_data %>%
filter(date >= as.Date("2010-01-01"))
# Step 6: Preview cleaned data
print(head(ff_data))
## # A tibble: 6 × 5
## date Mkt_RF SMB HML RF
## <date> <dbl> <dbl> <dbl> <dbl>
## 1 2010-01-01 -0.0336 0.004 0.0043 0
## 2 2010-02-01 0.034 0.0119 0.0322 0
## 3 2010-03-01 0.0631 0.0148 0.0221 0.01
## 4 2010-04-01 0.02 0.0487 0.0289 0.01
## 5 2010-05-01 -0.0789 0.0009 -0.0244 0.01
## 6 2010-06-01 -0.0557 -0.0181 -0.047 0.01
#4 Load necessary libraries
library(tidyquant)
library(timetk)
library(dplyr)
library(readr)
library(lubridate)
# ----------------------------
# Step 1: Download ETF prices
# ----------------------------
tickers <- c("SPY", "QQQ", "EEM", "IWM", "EFA", "TLT", "IYR", "GLD")
etf_prices <- tq_get(tickers,
from = "2010-01-01",
to = Sys.Date(),
get = "stock.prices") %>%
select(symbol, date, adjusted)
# Step 2: Calculate monthly returns (discrete)
monthly_returns <- etf_prices %>%
group_by(symbol) %>%
tq_transmute(select = adjusted,
mutate_fun = periodReturn,
period = "monthly",
type = "arithmetic",
col_rename = "monthly_return")
# Step 3: Convert to wide format (tickers as columns)
monthly_returns_wide <- monthly_returns %>%
pivot_wider(names_from = symbol, values_from = monthly_return)
# ----------------------------
# Step 4: Load Fama-French data
# ----------------------------
ff_data_raw <- read_csv("F-F_Research_Data_Factors.CSV", skip = 3)
## New names:
## • `` -> `...1`
## Warning: One or more parsing issues, call `problems()` on your data frame for details,
## e.g.:
## dat <- vroom(...)
## problems(dat)
## Rows: 1283 Columns: 5
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (1): ...1
## dbl (4): Mkt-RF, SMB, HML, RF
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
# Rename columns
colnames(ff_data_raw)[1:4] <- c("date", "Mkt_RF", "SMB", "HML")
# Filter and clean
ff_data <- ff_data_raw %>%
filter(!is.na(as.numeric(date))) %>%
mutate(date = ymd(paste0(date, "01"))) %>% # convert YYYYMM to Date
mutate(across(c(Mkt_RF, SMB, HML), ~ . / 100)) %>% # convert % to decimal
filter(date >= as.Date("2010-01-01"))
## Warning: There was 1 warning in `filter()`.
## ℹ In argument: `!is.na(as.numeric(date))`.
## Caused by warning:
## ! NAs introduced by coercion
## Warning: There was 1 warning in `mutate()`.
## ℹ In argument: `date = ymd(paste0(date, "01"))`.
## Caused by warning:
## ! 86 failed to parse.
# ----------------------------
# Step 5: Merge ETF returns and FF data
# ----------------------------
merged_data <- left_join(monthly_returns_wide, ff_data, by = "date")
# Preview merged data
print(head(merged_data))
## # A tibble: 6 × 13
## date SPY QQQ EEM IWM EFA TLT IYR GLD
## <date> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 2010-01-29 -0.0524 -0.0782 -0.104 -0.0605 -0.0749 0.0278 -0.0520 -0.0350
## 2 2010-02-26 0.0312 0.0460 0.0178 0.0448 0.00267 -0.00342 0.0546 0.0327
## 3 2010-03-31 0.0609 0.0771 0.0811 0.0823 0.0639 -0.0206 0.0975 -0.00439
## 4 2010-04-30 0.0155 0.0224 -0.00166 0.0568 -0.0280 0.0332 0.0639 0.0588
## 5 2010-05-28 -0.0795 -0.0739 -0.0939 -0.0754 -0.112 0.0511 -0.0568 0.0305
## 6 2010-06-30 -0.0517 -0.0598 -0.0140 -0.0774 -0.0206 0.0580 -0.0467 0.0236
## # ℹ 4 more variables: Mkt_RF <dbl>, SMB <dbl>, HML <dbl>, RF <dbl>
# --- Step 1: Load Libraries ---
library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ forcats 1.0.0 ✔ stringr 1.5.1
## ✔ ggplot2 3.5.2 ✔ tibble 3.2.1
## ✔ purrr 1.0.4
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::first() masks xts::first()
## ✖ dplyr::lag() masks stats::lag()
## ✖ dplyr::last() masks xts::last()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(lubridate)
library(quadprog)
# Install required packages if not already installed
install.packages("dplyr")
## Installing package into '/cloud/lib/x86_64-pc-linux-gnu-library/4.4'
## (as 'lib' is unspecified)
install.packages("lubridate")
## Installing package into '/cloud/lib/x86_64-pc-linux-gnu-library/4.4'
## (as 'lib' is unspecified)
install.packages("tidyverse")
## Installing package into '/cloud/lib/x86_64-pc-linux-gnu-library/4.4'
## (as 'lib' is unspecified)
install.packages("xts")
## Installing package into '/cloud/lib/x86_64-pc-linux-gnu-library/4.4'
## (as 'lib' is unspecified)
# Load libraries
library(dplyr)
library(lubridate)
library(tidyverse)
library(xts)
# --- Step 2: Check File Existence ---
if (!file.exists("F-F_Research_Data_Factors.csv")) {
stop("File 'F-F_Research_Data_Factors.csv' not found! Please check the file path.")
}
# --- Step 3: Load Fama-French Data ---
ff_data_raw <- read_csv("F-F_Research_Data_Factors.csv", skip = 3, show_col_types = FALSE) %>%
rename(date_str = `...1`) %>%
filter(!is.na(date_str), grepl("^\\d{6}$", date_str)) %>%
mutate(
date = ymd(paste0(date_str, "01")),
Mkt_RF = `Mkt-RF` / 100,
SMB = SMB / 100,
HML = HML / 100,
RF = RF / 100
) %>%
select(date, Mkt_RF, SMB, HML, RF)
## New names:
## • `` -> `...1`
## Warning: One or more parsing issues, call `problems()` on your data frame for details,
## e.g.:
## dat <- vroom(...)
## problems(dat)