This is an R Markdown document. Markdown is a simple formatting syntax for authoring HTML, PDF, and MS Word documents. For more details on using R Markdown see http://rmarkdown.rstudio.com.
When you click the Knit button a document will be generated that includes both content as well as the output of any embedded R code chunks within the document. You can embed an R code chunk like this:
library(tidyquant)
## Loading required package: lubridate
##
## Attaching package: 'lubridate'
## The following objects are masked from 'package:base':
##
## date, intersect, setdiff, union
## Loading required package: PerformanceAnalytics
## Loading required package: xts
## Loading required package: zoo
##
## Attaching package: 'zoo'
## The following objects are masked from 'package:base':
##
## as.Date, as.Date.numeric
##
## Attaching package: 'PerformanceAnalytics'
## The following object is masked from 'package:graphics':
##
## legend
## Loading required package: quantmod
## Loading required package: TTR
## Registered S3 method overwritten by 'quantmod':
## method from
## as.zoo.data.frame zoo
library(lubridate)
library(timetk)
library(purrr)
library(timetk)
library(dplyr)
##
## ######################### Warning from 'xts' package ##########################
## # #
## # The dplyr lag() function breaks how base R's lag() function is supposed to #
## # work, which breaks lag(my_xts). Calls to lag(my_xts) that you type or #
## # source() into this session won't work correctly. #
## # #
## # Use stats::lag() to make sure you're not using dplyr::lag(), or you can add #
## # conflictRules('dplyr', exclude = 'lag') to your .Rprofile to stop #
## # dplyr from breaking base R's lag() function. #
## # #
## # Code in packages is not affected. It's protected by R's namespace mechanism #
## # Set `options(xts.warn_dplyr_breaks_lag = FALSE)` to suppress this warning. #
## # #
## ###############################################################################
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:xts':
##
## first, last
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(tseries)
library(quantmod)
# Tickers of interest
tickers <- c("SPY", "QQQ", "EEM", "IWM", "EFA", "TLT", "IYR", "GLD")
# Download data using tidyquant which in turn uses quantmod
prices <- tq_get(tickers, from = "2010-01-01", to = Sys.Date(), get = "stock.prices", adjust = TRUE)
## Warning: There was 1 warning in `dplyr::mutate()`.
## ℹ In argument: `data.. = purrr::map(...)`.
## Caused by warning:
## ! x = 'IWM', get = 'stock.prices': Error in getSymbols.yahoo(Symbols = "IWM", env = <environment>, verbose = FALSE, : Unable to import "IWM".
## Timeout was reached: [query2.finance.yahoo.com] Connection timed out after 10001 milliseconds
## Removing IWM.
# View the first few lines of the downloaded data
head(prices)
## # A tibble: 6 × 8
## symbol date open high low close volume adjusted
## <chr> <date> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 SPY 2010-01-04 86.1 86.9 85.5 86.9 118944600 86.9
## 2 SPY 2010-01-05 86.8 87.1 86.5 87.1 111579900 87.1
## 3 SPY 2010-01-06 87.0 87.4 86.9 87.2 116074400 87.2
## 4 SPY 2010-01-07 87.0 87.6 86.7 87.5 131091100 87.5
## 5 SPY 2010-01-08 87.3 87.8 87.1 87.8 126402800 87.8
## 6 SPY 2010-01-11 88.2 88.2 87.6 87.9 106375700 87.9
# Sample code for recalculating monthly returns, assuming 'prices' dataframe is correct
monthly_returns <- prices %>%
group_by(symbol) %>%
tq_transmute(select = adjusted,
mutate_fun = periodReturn,
period = 'monthly',
col_rename = "monthly_returns")
# Display the head of monthly returns to check results
head(monthly_returns)
## # A tibble: 6 × 3
## # Groups: symbol [1]
## symbol date monthly_returns
## <chr> <date> <dbl>
## 1 SPY 2010-01-29 -0.0524
## 2 SPY 2010-02-26 0.0312
## 3 SPY 2010-03-31 0.0609
## 4 SPY 2010-04-30 0.0155
## 5 SPY 2010-05-28 -0.0795
## 6 SPY 2010-06-30 -0.0517
# If the file has headers but a different delimiter
ff_data <- read.csv("F-F_Research_Data_Factors.CSV", sep = ";") # Adjust sep as necessary
# If the file does not have headers
ff_data <- read.csv("F-F_Research_Data_Factors.CSV", header = FALSE)
colnames(ff_data) <- c("Date", "Mkt.RF", "SMB", "HML", "RF") # Add other columns if there are more
# Assuming you have already read ff_data as shown in the previous step
# Convert factors or characters to numeric
ff_data$Mkt.RF <- as.numeric(as.character(ff_data$Mkt.RF))
## Warning: NAs introduced by coercion
ff_data$SMB <- as.numeric(as.character(ff_data$SMB))
## Warning: NAs introduced by coercion
ff_data$HML <- as.numeric(as.character(ff_data$HML))
## Warning: NAs introduced by coercion
ff_data$RF <- as.numeric(as.character(ff_data$RF))
## Warning: NAs introduced by coercion
# Now perform the operation
ff_data$Mkt.RF <- ff_data$Mkt.RF / 100
ff_data$SMB <- ff_data$SMB / 100
ff_data$HML <- ff_data$HML / 100
ff_data$RF <- ff_data$RF / 100
# Check the structure of the dataframe
str(ff_data)
## 'data.frame': 1275 obs. of 5 variables:
## $ Date : chr "This file was created by CMPT_ME_BEME_RETS using the 202402 CRSP database." "The 1-month TBill return is from Ibbotson and Associates" "" "192607" ...
## $ Mkt.RF: num NA NA NA 0.0296 0.0264 0.0036 -0.0324 0.0253 0.0262 -0.0006 ...
## $ SMB : num NA NA NA -0.0256 -0.0117 -0.014 -0.0009 -0.001 -0.0003 -0.0037 ...
## $ HML : num NA NA NA -0.0243 0.0382 0.0013 0.007 -0.0051 -0.0005 0.0454 ...
## $ RF : num NA NA NA 0.0022 0.0025 0.0023 0.0032 0.0031 0.0028 0.0025 ...
# Look at the first few rows of the dataframe
head(ff_data)
## Date
## 1 This file was created by CMPT_ME_BEME_RETS using the 202402 CRSP database.
## 2 The 1-month TBill return is from Ibbotson and Associates
## 3
## 4 192607
## 5 192608
## 6 192609
## Mkt.RF SMB HML RF
## 1 NA NA NA NA
## 2 NA NA NA NA
## 3 NA NA NA NA
## 4 0.0296 -0.0256 -0.0243 0.0022
## 5 0.0264 -0.0117 0.0382 0.0025
## 6 0.0036 -0.0140 0.0013 0.0023
# Ensure the date column is in Date format if not already
monthly_returns$date <- as.Date(monthly_returns$date, format = "%Y-%m-%d")
# Filter data between March 2019 and February 2024
historical_returns <- monthly_returns %>%
filter(date >= as.Date("2019-03-01") & date <= as.Date("2024-02-28"))
# Check the structure of your data frame to see column types
str(historical_returns)
## gropd_df [413 × 3] (S3: grouped_df/tbl_df/tbl/data.frame)
## $ symbol : chr [1:413] "SPY" "SPY" "SPY" "SPY" ...
## $ date : Date[1:413], format: "2019-03-29" "2019-04-30" ...
## $ monthly_returns: num [1:413] 0.0181 0.0409 -0.0638 0.0696 0.0151 ...
## - attr(*, "groups")= tibble [7 × 2] (S3: tbl_df/tbl/data.frame)
## ..$ symbol: chr [1:7] "EEM" "EFA" "GLD" "IYR" ...
## ..$ .rows : list<int> [1:7]
## .. ..$ : int [1:59] 119 120 121 122 123 124 125 126 127 128 ...
## .. ..$ : int [1:59] 178 179 180 181 182 183 184 185 186 187 ...
## .. ..$ : int [1:59] 355 356 357 358 359 360 361 362 363 364 ...
## .. ..$ : int [1:59] 296 297 298 299 300 301 302 303 304 305 ...
## .. ..$ : int [1:59] 60 61 62 63 64 65 66 67 68 69 ...
## .. ..$ : int [1:59] 1 2 3 4 5 6 7 8 9 10 ...
## .. ..$ : int [1:59] 237 238 239 240 241 242 243 244 245 246 ...
## .. ..@ ptype: int(0)
## ..- attr(*, ".drop")= logi TRUE
# Convert all columns to numeric if they are not already
# Assuming all columns should be numeric and are compatible with conversion
historical_returns[] <- lapply(historical_returns, function(x) as.numeric(as.character(x)))
## Warning in FUN(X[[i]], ...): NAs introduced by coercion
## Warning in FUN(X[[i]], ...): NAs introduced by coercion
# Check again after conversion
str(historical_returns)
## gropd_df [413 × 3] (S3: grouped_df/tbl_df/tbl/data.frame)
## $ symbol : num [1:413] NA NA NA NA NA NA NA NA NA NA ...
## $ date : num [1:413] NA NA NA NA NA NA NA NA NA NA ...
## $ monthly_returns: num [1:413] 0.0181 0.0409 -0.0638 0.0696 0.0151 ...
## - attr(*, "groups")= tibble [1 × 2] (S3: tbl_df/tbl/data.frame)
## ..$ symbol: num NA
## ..$ .rows : list<int> [1:1]
## .. ..$ : int [1:413] 1 2 3 4 5 6 7 8 9 10 ...
## .. ..@ ptype: int(0)
## ..- attr(*, ".drop")= logi TRUE
# Calculate the covariance matrix again
cov_matrix <- cov(historical_returns)
# cov_matrix <- cov(historical_returns[,-1]) # Adjust index if a non-numeric column is present
# Print the covariance matrix
print(cov_matrix)
## symbol date monthly_returns
## symbol NA NA NA
## date NA NA NA
## monthly_returns NA NA 0.002888327
# Convert all columns in ff_data that should be numeric
ff_data <- ff_data %>%
mutate(across(.cols = 1:3, as.numeric)) # Convert the first three columns to numeric
## Warning: There was 1 warning in `mutate()`.
## ℹ In argument: `across(.cols = 1:3, as.numeric)`.
## Caused by warning:
## ! NAs introduced by coercion
# Recalculate the covariance matrix
cov_FF_factors <- cov(ff_data[, 1:3])
# Check if the conversion has resolved the issue
print(cov_FF_factors)
## Date Mkt.RF SMB
## Date NA NA NA
## Mkt.RF NA NA NA
## SMB NA NA NA
You can also embed plots, for example:
Note that the echo = FALSE
parameter was added to the
code chunk to prevent printing of the R code that generated the
plot.