# Load necessary libraries
library(readxl)
library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library(zoo)
## 
## Attaching package: 'zoo'
## The following objects are masked from 'package:base':
## 
##     as.Date, as.Date.numeric
library(lubridate)
## 
## Attaching package: 'lubridate'
## The following objects are masked from 'package:base':
## 
##     date, intersect, setdiff, union
library(PerformanceAnalytics)
## Loading required package: xts
## 
## ######################### Warning from 'xts' package ##########################
## #                                                                             #
## # The dplyr lag() function breaks how base R's lag() function is supposed to  #
## # work, which breaks lag(my_xts). Calls to lag(my_xts) that you type or       #
## # source() into this session won't work correctly.                            #
## #                                                                             #
## # Use stats::lag() to make sure you're not using dplyr::lag(), or you can add #
## # conflictRules('dplyr', exclude = 'lag') to your .Rprofile to stop           #
## # dplyr from breaking base R's lag() function.                                #
## #                                                                             #
## # Code in packages is not affected. It's protected by R's namespace mechanism #
## # Set `options(xts.warn_dplyr_breaks_lag = FALSE)` to suppress this warning.  #
## #                                                                             #
## ###############################################################################
## 
## Attaching package: 'xts'
## The following objects are masked from 'package:dplyr':
## 
##     first, last
## 
## Attaching package: 'PerformanceAnalytics'
## The following object is masked from 'package:graphics':
## 
##     legend
# Define the path to the Excel file
excel_file_path <- "hw5.xlsx"

# Read the data from Excel (assuming the data is in the first sheet)
hw5 <- read_excel(excel_file_path, sheet = 1)

# Inspect the first few rows and structure of the hw5 dataset
head(hw5)
## # A tibble: 6 × 17
##     Date `Mkt-RF`   SMB   HML   RMW   CMA    RF NoDur Durbl Manuf Enrgy HiTec
##    <dbl>    <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 196307    -0.39 -0.41 -0.97  0.68 -1.18  0.27 -0.48 -0.07 -1.39  2.3  -0.68
## 2 196308     5.07 -0.8   1.8   0.36 -0.35  0.25  4.87  6.54  6.19  3.94  5.13
## 3 196309    -1.57 -0.52  0.13 -0.71  0.29  0.27 -1.69 -0.25 -0.78 -3.65  0.14
## 4 196310     2.53 -1.39 -0.1   2.8  -2.01  0.29  2.66 10.7   2.54 -0.33  8.3 
## 5 196311    -0.85 -0.88  1.75 -0.51  2.24  0.27 -1.12 -5.17  0.3  -1.15 -0.29
## 6 196312     1.83 -2.1  -0.02  0.03 -0.07  0.29  2.81  0.6   1.89  4.66  3.04
## # ℹ 5 more variables: Telcm <dbl>, Shops <dbl>, Hlth <dbl>, Utils <dbl>,
## #   Other <dbl>
str(hw5)
## tibble [729 × 17] (S3: tbl_df/tbl/data.frame)
##  $ Date  : num [1:729] 196307 196308 196309 196310 196311 ...
##  $ Mkt-RF: num [1:729] -0.39 5.07 -1.57 2.53 -0.85 1.83 2.24 1.54 1.41 0.1 ...
##  $ SMB   : num [1:729] -0.41 -0.8 -0.52 -1.39 -0.88 -2.1 0.13 0.28 1.23 -1.52 ...
##  $ HML   : num [1:729] -0.97 1.8 0.13 -0.1 1.75 -0.02 1.48 2.81 3.4 -0.67 ...
##  $ RMW   : num [1:729] 0.68 0.36 -0.71 2.8 -0.51 0.03 0.17 -0.05 -2.21 -1.27 ...
##  $ CMA   : num [1:729] -1.18 -0.35 0.29 -2.01 2.24 -0.07 1.47 0.91 3.22 -1.08 ...
##  $ RF    : num [1:729] 0.27 0.25 0.27 0.29 0.27 0.29 0.3 0.26 0.31 0.29 ...
##  $ NoDur : num [1:729] -0.48 4.87 -1.69 2.66 -1.12 2.81 0.8 1.86 3.08 -0.47 ...
##  $ Durbl : num [1:729] -0.07 6.54 -0.25 10.71 -5.17 ...
##  $ Manuf : num [1:729] -1.39 6.19 -0.78 2.54 0.3 1.89 2.71 2.73 3.7 -1.72 ...
##  $ Enrgy : num [1:729] 2.3 3.94 -3.65 -0.33 -1.15 4.66 4.85 1.08 1.4 4.04 ...
##  $ HiTec : num [1:729] -0.68 5.13 0.14 8.3 -0.29 3.04 2.93 1.96 3.38 -3.63 ...
##  $ Telcm : num [1:729] -0.25 4.28 2.36 3.43 4.14 -0.09 3.32 -0.28 -0.9 0.83 ...
##  $ Shops : num [1:729] -1.05 6.42 0.93 0.51 -1.25 0.33 2.52 1.63 2.43 2.88 ...
##  $ Hlth  : num [1:729] 0.57 9.56 -4.07 3.38 -1.65 1.53 3.7 1.65 -1.94 -1.42 ...
##  $ Utils : num [1:729] 0.81 4.2 -2.5 -0.67 -1.02 2.41 1.4 0.87 -0.74 1.05 ...
##  $ Other : num [1:729] -1.59 5.44 -3.18 1.39 0.1 2.66 1.17 5.38 2.34 0.29 ...
# Inspect the Date column to understand its format
print(head(hw5$Date))
## [1] 196307 196308 196309 196310 196311 196312
# Attempt to identify the format of the Date column
# Convert the Date column based on the identified format
# If Date is in "YYYY-MM-DD" format
hw5$Date <- as.Date(hw5$Date, format = "%Y-%m-%d")

# Print the Date column after conversion
print(head(hw5$Date))
## [1] "2507-06-22" "2507-06-23" "2507-06-24" "2507-06-25" "2507-06-26"
## [6] "2507-06-27"
# Check if the Date conversion was successful
if (any(is.na(hw5$Date))) {
  stop("Date conversion failed. Please check the Date format in the Excel file.")
}

# Filter data to start from 1969
hw5 <- hw5 %>% filter(Date >= "1969-01-01")

# Define column names for factors and industry returns
factor_columns <- c("Mkt-RF", "SMB", "HML", "RMW", "CMA", "RF")
industry_columns <- c("NoDur", "Durbl", "Manuf", "Enrgy", "HiTec", "Telcm", "Utils", "Shops", "Hlth", "Other")

# Check if the required columns are present
required_columns <- c("Date", factor_columns, industry_columns)
missing_columns <- setdiff(required_columns, colnames(hw5))
if (length(missing_columns) > 0) {
  stop(paste("Missing columns in the data:", paste(missing_columns, collapse = ", ")))
}

# Extract returns for calculations and ensure they are numeric
returns_data <- hw5[, c(factor_columns, industry_columns)] %>%
  mutate(across(everything(), as.numeric))

# Check for any NA values in the returns data
if (any(is.na(returns_data))) {
  stop("There are NA values in the returns data. Please check the data.")
}

# Calculate the rolling 5-year (60 months) covariance matrix and MVP weights
calculate_mvp_weights <- function(returns) {
  cov_matrix <- cov(returns, use = "pairwise.complete.obs")
  inv_cov_matrix <- try(solve(cov_matrix), silent = TRUE)
  if (inherits(inv_cov_matrix, "try-error")) {
    stop("Covariance matrix is singular and cannot be inverted.")
  }
  ones <- rep(1, ncol(returns))
  mvp_weights <- inv_cov_matrix %*% ones / sum(inv_cov_matrix %*% ones)
  return(mvp_weights)
}

# Calculate the MVP returns using a rolling window of 5 years (60 months)
mvp_returns <- rollapply(
  data = returns_data,
  width = 60,
  FUN = function(x) {
    weights <- calculate_mvp_weights(x)
    portfolio_returns <- x %*% weights
    return(mean(portfolio_returns, na.rm = TRUE))
  },
  by.column = FALSE,
  align = "right"
)

# Create a new data frame to store the MVP returns
mvp_returns_df <- data.frame(
  Date = hw5$Date[60:nrow(hw5)],
  MVP_Returns = mvp_returns
)

# Calculate the cumulative returns
cumulative_returns <- cumprod(1 + mvp_returns_df$MVP_Returns)

# Check for any missing or infinite values in the cumulative returns
if (any(is.na(cumulative_returns)) || any(is.infinite(cumulative_returns))) {
  stop("There are missing or infinite values in the cumulative returns. Please check the calculations.")
}

# Plot the cumulative returns
plot(
  mvp_returns_df$Date, cumulative_returns,
  type = "l",
  main = "Cumulative Returns of MVP Portfolio",
  xlab = "Year",
  ylab = "Cumulative Returns",
  col = "blue"
)