# Load libraries
#install.packages('tidyquant')
#install.packages('lubridate')
#install.packages('timetk')
#install.packages('purrr')
#install.packages('xts')
#install.packages('s')
#install.packages('zoo')
#install.packages(c("readr", "dplyr"))
#install.packages("SIT")
#install.packages("PerformanceAnalytics")
library(PerformanceAnalytics)
## Loading required package: xts
## Loading required package: zoo
## 
## Attaching package: 'zoo'
## The following objects are masked from 'package:base':
## 
##     as.Date, as.Date.numeric
## 
## Attaching package: 'PerformanceAnalytics'
## The following object is masked from 'package:graphics':
## 
##     legend
library(SIT)
library(readr)
library(dplyr)
## 
## ######################### Warning from 'xts' package ##########################
## #                                                                             #
## # The dplyr lag() function breaks how base R's lag() function is supposed to  #
## # work, which breaks lag(my_xts). Calls to lag(my_xts) that you type or       #
## # source() into this session won't work correctly.                            #
## #                                                                             #
## # Use stats::lag() to make sure you're not using dplyr::lag(), or you can add #
## # conflictRules('dplyr', exclude = 'lag') to your .Rprofile to stop           #
## # dplyr from breaking base R's lag() function.                                #
## #                                                                             #
## # Code in packages is not affected. It's protected by R's namespace mechanism #
## # Set `options(xts.warn_dplyr_breaks_lag = FALSE)` to suppress this warning.  #
## #                                                                             #
## ###############################################################################
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:xts':
## 
##     first, last
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library(tidyquant)
## Loading required package: lubridate
## 
## Attaching package: 'lubridate'
## The following objects are masked from 'package:base':
## 
##     date, intersect, setdiff, union
## Loading required package: quantmod
## Loading required package: TTR
## Registered S3 method overwritten by 'quantmod':
##   method            from
##   as.zoo.data.frame zoo
library(lubridate)
library(timetk)
library(purrr)
library(xts)
library(dplyr)
library(quantmod)
library(zoo)
# Question 1: Import data
symbols <- c("SPY", "QQQ", "EEM", "IWM", "EFA", "TLT", "IYR", "GLD")

prices <- 
  getSymbols(symbols, src = 'yahoo', from = "2010-01-01", 
             auto.assign = TRUE, warnings = FALSE) %>% 
  map(~Ad(get(.))) %>% 
  reduce(merge) %>%
  `colnames<-`(symbols)

head(prices)
##                 SPY      QQQ      EEM      IWM      EFA      TLT      IYR
## 2010-01-04 88.11792 41.00546 32.42902 53.08979 37.99448 63.06110 28.77182
## 2010-01-05 88.35119 41.00546 32.66440 52.90724 38.02797 63.46839 28.84092
## 2010-01-06 88.41335 40.75813 32.73274 52.85745 38.18871 62.61879 28.82835
## 2010-01-07 88.78658 40.78462 32.54292 53.24745 38.04137 62.72411 29.08586
## 2010-01-08 89.08206 41.12029 32.80107 53.53786 38.34275 62.69598 28.89116
## 2010-01-11 89.20647 40.95248 32.73274 53.32214 38.65753 62.35199 29.02933
##               GLD
## 2010-01-04 109.80
## 2010-01-05 109.70
## 2010-01-06 111.51
## 2010-01-07 110.82
## 2010-01-08 111.37
## 2010-01-11 112.85
# Question 2: Calculate weekly and monthly returns using log returns
weekly_returns <- apply(prices, 2, function(x) diff(log(x)))
monthly_returns <- apply(prices, 2, function(x) apply.monthly(log(x), Return.cumulative))

# Question 3: Convert monthly returns into tibble format
monthly_returns_tbl <- as_tibble(monthly_returns, rownames = 'date')
monthly_returns_tbl$date <- as.Date(monthly_returns_tbl$date, format = "%Y-%m-%d")
# Question 4: Download Fama French 3 factors data and change to digit numbers
library(readr)
library(dplyr)

ff_data_url <- "http://mba.tuck.dartmouth.edu/pages/faculty/ken.french/ftp/F-F_Research_Data_5_Factors_2x3_CSV.zip"
temp_zip <- tempfile()
download.file(ff_data_url, temp_zip)
unzip(temp_zip, exdir = tempdir())
ff_data_file <- file.path(tempdir(), "F-F_Research_Data_5_Factors_2x3.csv")

# Read the CSV file and skip the first three rows
ff_data <- read_csv(ff_data_file, skip = 3)
## New names:
## • `` -> `...1`
## Warning: One or more parsing issues, call `problems()` on your data frame for details,
## e.g.:
##   dat <- vroom(...)
##   problems(dat)
## Rows: 779 Columns: 7
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (7): ...1, Mkt-RF, SMB, HML, RMW, CMA, RF
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
# Rename the Date column in ff_data
colnames(ff_data)[1] <- "Date"

# Convert "Date" column in ff_data to Date type
ff_data$Date <- as.Date(as.character(ff_data$Date), format = "%Y-%m-%d")

# Rename the Date column in monthly_returns_tbl
colnames(monthly_returns_tbl)[colnames(monthly_returns_tbl) == "date"] <- "Date"

# Convert "Date" column in monthly_returns_tbl to Date type
monthly_returns_tbl$Date <- as.Date(monthly_returns_tbl$Date, format = "%Y-%m-%d")

# Question 5: Merge monthly return data in question 3 and 4 into tibble format
merged_data <- left_join(monthly_returns_tbl, ff_data, by = "Date")
## Warning in left_join(monthly_returns_tbl, ff_data, by = "Date"): Detected an unexpected many-to-many relationship between `x` and `y`.
## ℹ Row 1 of `x` matches multiple rows in `y`.
## ℹ Row 1 of `y` matches multiple rows in `x`.
## ℹ If a many-to-many relationship is expected, set `relationship =
##   "many-to-many"` to silence this warning.
# Question 6: Compute covariance matrix for the 8-asset portfolio based on CAPM model
start_date <- as.Date("2010-02-01")
end_date <- as.Date("2015-01-01")
portfolio_returns <- monthly_returns_tbl[start_date:end_date, ]
str(portfolio_returns)
## tibble [1,796 × 9] (S3: tbl_df/tbl/data.frame)
##  $ Date: Date[1:1796], format: NA NA ...
##  $ SPY : num [1:1796] NA NA NA NA NA NA NA NA NA NA ...
##  $ QQQ : num [1:1796] NA NA NA NA NA NA NA NA NA NA ...
##  $ EEM : num [1:1796] NA NA NA NA NA NA NA NA NA NA ...
##  $ IWM : num [1:1796] NA NA NA NA NA NA NA NA NA NA ...
##  $ EFA : num [1:1796] NA NA NA NA NA NA NA NA NA NA ...
##  $ TLT : num [1:1796] NA NA NA NA NA NA NA NA NA NA ...
##  $ IYR : num [1:1796] NA NA NA NA NA NA NA NA NA NA ...
##  $ GLD : num [1:1796] NA NA NA NA NA NA NA NA NA NA ...
# Convert columns to numeric if needed
portfolio_returns <- portfolio_returns %>%
  mutate(
    SPY = as.numeric(SPY),
    QQQ = as.numeric(QQQ),
    # Repeat this for all columns containing returns
  )

# Check the structure again to ensure the columns are now numeric
str(portfolio_returns)
## tibble [1,796 × 9] (S3: tbl_df/tbl/data.frame)
##  $ Date: Date[1:1796], format: NA NA ...
##  $ SPY : num [1:1796] NA NA NA NA NA NA NA NA NA NA ...
##  $ QQQ : num [1:1796] NA NA NA NA NA NA NA NA NA NA ...
##  $ EEM : num [1:1796] NA NA NA NA NA NA NA NA NA NA ...
##  $ IWM : num [1:1796] NA NA NA NA NA NA NA NA NA NA ...
##  $ EFA : num [1:1796] NA NA NA NA NA NA NA NA NA NA ...
##  $ TLT : num [1:1796] NA NA NA NA NA NA NA NA NA NA ...
##  $ IYR : num [1:1796] NA NA NA NA NA NA NA NA NA NA ...
##  $ GLD : num [1:1796] NA NA NA NA NA NA NA NA NA NA ...
# Remove rows with missing values
portfolio_returns <- portfolio_returns[complete.cases(portfolio_returns), ]

# Calculate the covariance matrix
cov_matrix_capm <- cov(portfolio_returns)