# 1/N Portfolio Construction without Monthly Rebalancing
# This script creates an equally weighted portfolio of all assets
# Initial portfolio weights are set once after the first 36 months
# No rebalancing is performed afterward

# ---- PACKAGE INSTALLATION AND LOADING ----
if (!require("pacman")) install.packages("pacman")
## Cargando paquete requerido: pacman
pacman::p_load(
  tidyverse,    # Data manipulation and visualization
  quantmod,     # Financial data retrieval and analysis
  PerformanceAnalytics, # Performance and risk analysis
  zoo,          # Time series analysis
  roll,         # Rolling window calculations
  xts,          # Extensible time series
  lubridate,    # Date handling
  readxl,       # Excel file reading
  knitr,        # For tables
  grid,         # For graphics
  gridExtra     # For arranging multiple plots
)

# ---- FILE PATH CONFIGURATION ----
# Create file paths using file.path() to handle spaces and special characters better
base_dir <- getwd()  # Use current working directory

# Use forward slashes instead of backslashes
stock_file_path <- file.path(base_dir, "data.xlsx") 
market_file_path <- file.path(base_dir, "data2.xlsx")

# Print the paths for verification
cat("Stock file path:", stock_file_path, "\n")
## Stock file path: C:/Users/lcyep/OneDrive - Instituto Tecnologico y de Estudios Superiores de Monterrey/Tec/Semestre 6/Risk/R, project/data.xlsx
cat("Market file path:", market_file_path, "\n")
## Market file path: C:/Users/lcyep/OneDrive - Instituto Tecnologico y de Estudios Superiores de Monterrey/Tec/Semestre 6/Risk/R, project/data2.xlsx
# ---- DATA IMPORT FUNCTION ----
# Function to import Excel data
import_excel_data <- function(stock_file_path, market_file_path) {
  tryCatch({
    # Read stock price data
    cat("Reading stock data from:", stock_file_path, "\n")
    stock_data <- read_excel(stock_file_path)
    
    # Read benchmark and risk-free rate data
    cat("Reading market data from:", market_file_path, "\n")
    market_data <- read_excel(market_file_path)
    
    # Convert the first column to dates
    dates <- as.Date(stock_data[[1]])
    
    # Create xts objects for stock prices
    stock_prices <- as.matrix(stock_data[, -1])  # Remove the date column
    rownames(stock_prices) <- NULL
    stock_prices_xts <- xts(stock_prices, order.by = dates)
    colnames(stock_prices_xts) <- colnames(stock_data)[-1]  # Preserve stock names
    
    # Extract S&P 500 price and T-bill data
    sp500_prices <- as.numeric(market_data[[2]])  # S&P 500 Price Index
    tbill_rates <- as.numeric(market_data[[4]]) / 100 / 12  # Convert annual rate to monthly
    
    market_dates <- as.Date(market_data[[1]])
    sp500_prices_xts <- xts(sp500_prices, order.by = market_dates)
    tbill_rates_xts <- xts(tbill_rates, order.by = market_dates)
    
    # Calculate returns for stocks
    stock_returns_xts <- ROC(stock_prices_xts, type = "discrete", n = 1)
    stock_returns_xts <- stock_returns_xts[-1, ]  # Remove first NA row
    
    # Calculate returns for S&P 500
    sp500_returns_xts <- ROC(sp500_prices_xts, type = "discrete", n = 1)
    sp500_returns_xts <- sp500_returns_xts[-1, ]  # Remove first NA row
    
    # Remove first value from tbill rates to align with returns
    tbill_rates_xts <- tbill_rates_xts[-1, ]
    
    return(list(
      stock_returns = stock_returns_xts,
      sp500_returns = sp500_returns_xts,
      risk_free_rate = tbill_rates_xts
    ))
  }, error = function(e) {
    stop(paste("Error reading files:", e$message))
  })
}

# ---- CONSTRUCT 1/N PORTFOLIO ----
# Function to construct a 1/N portfolio without monthly rebalancing
construct_1n_portfolio <- function(asset_returns, window_size = 36) {
  dates <- index(asset_returns)
  n_periods <- length(dates) - window_size
  portfolio_returns <- numeric(n_periods)
  portfolio_weights <- NULL
  
  cat("Starting 1/N portfolio construction with", n_periods, "investment periods\n")
  
  # Initial investments after the first 36 months
  start_idx <- 1
  end_idx <- window_size
  
  # Select all assets that have returns data
  initial_returns <- asset_returns[start_idx:end_idx, ]
  valid_assets <- !apply(is.na(initial_returns), 2, any)
  
  # Create equal weights for valid assets
  n_assets <- sum(valid_assets)
  initial_weights <- rep(0, ncol(asset_returns))
  initial_weights[valid_assets] <- 1/n_assets
  
  cat("Initial portfolio has", n_assets, "equally weighted assets\n")
  
  # Store the initial weights
  portfolio_weights <- initial_weights
  asset_names <- colnames(asset_returns)
  
  # Create a weights data frame for reference
  weights_df <- data.frame(
    Asset = asset_names,
    Weight = portfolio_weights
  )
  
  # Calculate portfolio returns for each period
  # No rebalancing - simulate holding the initial investments
  for (i in 1:n_periods) {
    # Get returns for the period
    period_returns <- as.numeric(asset_returns[end_idx + i, ])
    
    # Calculate portfolio return
    portfolio_returns[i] <- sum(portfolio_weights * period_returns, na.rm = TRUE)
    
    # Print progress
    if (i %% 20 == 0 || i == 1 || i == n_periods) {
      cat("Processing period", i, "of", n_periods, 
          "- Date:", as.character(dates[end_idx + i]), "\n")
    }
  }
  
  # Create a time series of portfolio returns
  portfolio_returns_ts <- xts(portfolio_returns, order.by = dates[(window_size + 1):length(dates)])
  
  # Save weights to a file for analysis
  output_file <- file.path(base_dir, "1n_portfolio_weights.csv")
  write.csv(weights_df, output_file, row.names = FALSE)
  cat("Saved portfolio weights to:", output_file, "\n")
  
  return(list(
    returns = portfolio_returns_ts,
    weights = weights_df
  ))
}

# ---- PERFORMANCE ANALYSIS FUNCTION ----
# Function to analyze portfolio performance
analyze_portfolio <- function(portfolio_result, risk_free_rate, sp500_returns) {
  portfolio_returns <- portfolio_result$returns
  
  # Calculate cumulative returns
  cumulative_returns <- cumprod(1 + portfolio_returns) - 1
  
  # Plot results
  plot(cumulative_returns, main = "Cumulative Portfolio Returns (1/N Strategy)", 
       ylab = "Return", xlab = "Date")
  
  # Calculate performance metrics
  portfolio_avg_return <- mean(portfolio_returns) * 12 * 100  # Annualized and as percentage
  portfolio_sd <- sd(portfolio_returns) * sqrt(12) * 100      # Annualized and as percentage
  portfolio_sharpe <- mean(portfolio_returns - risk_free_rate) / sd(portfolio_returns)
  
  # Calculate benchmark metrics for the same period
  benchmark_returns <- sp500_returns[(36+1):length(sp500_returns)]
  benchmark_avg_return <- mean(benchmark_returns) * 12 * 100  # Annualized and as percentage
  benchmark_sd <- sd(benchmark_returns) * sqrt(12) * 100      # Annualized and as percentage
  benchmark_sharpe <- mean(benchmark_returns - risk_free_rate) / sd(benchmark_returns)
  
  # Calculate Upside-Potential Ratio for both
  target_return <- mean(risk_free_rate)
  portfolio_upside <- mean(pmax(portfolio_returns - target_return, 0)) / 
                    sqrt(mean((portfolio_returns - mean(portfolio_returns))^2))
  benchmark_upside <- mean(pmax(benchmark_returns - target_return, 0)) / 
                    sqrt(mean((benchmark_returns - mean(benchmark_returns))^2))
  
  # Create performance comparison table
  performance_table <- data.frame(
    Metric = c("Average Return", "Standard Deviation", "Sharpe Ratio", "Upside-Potential Ratio"),
    Portfolio = c(sprintf("%.2f%%", portfolio_avg_return), 
                 sprintf("%.2f%%", portfolio_sd),
                 sprintf("%.3f", portfolio_sharpe),
                 sprintf("%.3f", portfolio_upside)),
    Benchmark = c(sprintf("%.2f%%", benchmark_avg_return),
                 sprintf("%.2f%%", benchmark_sd),
                 sprintf("%.3f", benchmark_sharpe),
                 sprintf("%.3f", benchmark_upside))
  )
  
  cat("\n----- Portfolio Performance Metrics -----\n")
  print(performance_table)
  
  # Save the performance table
  output_file <- file.path(base_dir, "1n_performance_comparison.csv")
  write.csv(performance_table, output_file, row.names = FALSE)
  cat("Saved performance comparison to:", output_file, "\n")
  
  # Save portfolio returns to CSV
  returns_df <- data.frame(
    Date = index(portfolio_returns),
    Return = as.numeric(portfolio_returns)
  )
  output_file <- file.path(base_dir, "1n_portfolio_returns.csv")
  write.csv(returns_df, output_file, row.names = FALSE)
  cat("Saved portfolio returns to:", output_file, "\n")
  
  # Create a nice performance comparison table for display
  create_performance_table <- function() {
    # Use actually calculated metrics
    metrics_table <- data.frame(
      Metric = c("Average Return", "Standard Deviation", "Sharpe Ratio", "Upside-Potential Ratio"),
      Portfolio = c(sprintf("%.2f%%", portfolio_avg_return), 
                   sprintf("%.2f%%", portfolio_sd),
                   sprintf("%.3f", portfolio_sharpe),
                   sprintf("%.3f", portfolio_upside)),
      Benchmark = c(sprintf("%.2f%%", benchmark_avg_return),
                   sprintf("%.2f%%", benchmark_sd),
                   sprintf("%.3f", benchmark_sharpe),
                   sprintf("%.3f", benchmark_upside))
    )
    
    colnames(metrics_table) <- c("Metric", "1/N\nPortfolio", "S&P 500\nBenchmark")
    
    # Create a nice table with the metrics - improved visibility
    grid.newpage()
    tt <- ttheme_default(
      core = list(fg_params=list(fontface=1, fontsize=14, col="black"),
                  bg_params=list(fill=c("whitesmoke", "white"))),
      colhead = list(fg_params=list(fontface=2, fontsize=16, col="black"),
                     bg_params=list(fill="lightblue")),
      rowhead = list(fg_params=list(fontface=1, fontsize=14, col="black"))
    )
    grid.table(metrics_table, rows = NULL, theme = tt)
    
    # Save the table as an image with improved settings
    output_file <- file.path(base_dir, "1n_performance_table.png")
    png(output_file, width = 1000, height = 500, res = 120)
    grid.newpage()
    grid.table(metrics_table, rows = NULL, theme = tt)
    dev.off()
    cat("Saved performance table image to:", output_file, "\n")
    
    # Also save as CSV for backup
    write.csv(metrics_table, file.path(base_dir, "1n_performance_metrics.csv"), row.names = FALSE)
  }
  
  # Create the table
  create_performance_table()
  
  return(list(
    cumulative_returns = cumulative_returns,
    performance_table = performance_table
  ))
}

# ---- MAIN EXECUTION ----
# Run the complete analysis
run_analysis <- function() {
  # Start timer
  start_time <- Sys.time()
  cat("Starting 1/N portfolio analysis at", as.character(start_time), "\n")
  
  # Import data - with error handling
  cat("Importing data from Excel files...\n")
  tryCatch({
    data <- import_excel_data(stock_file_path, market_file_path)
  }, error = function(e) {
    cat("ERROR: Could not read data files. Please check the paths and file contents.\n")
    cat("Error details:", e$message, "\n")
    cat("Current working directory:", getwd(), "\n")
    cat("Looking for files at:\n")
    cat("  ", stock_file_path, "\n")
    cat("  ", market_file_path, "\n")
    cat("Files exist check:\n")
    cat("  Stock file exists:", file.exists(stock_file_path), "\n")
    cat("  Market file exists:", file.exists(market_file_path), "\n")
    stop("Data import failed - see details above")
  })
  
  # Basic data exploration with validation
  cat("\nData summary:\n")
  cat("- Number of stocks:", ncol(data$stock_returns), "\n")
  cat("- Date range:", as.character(first(index(data$stock_returns))), 
      "to", as.character(last(index(data$stock_returns))), "\n")
  cat("- Number of observations:", nrow(data$stock_returns), "\n")
  
  # Check for NA values
  na_count <- sum(is.na(data$stock_returns))
  if(na_count > 0) {
    cat("- WARNING: Found", na_count, "NA values in the returns data\n")
  }
  cat("\n")
  
  # Run the portfolio construction with progress tracking
  cat("Constructing 1/N portfolio...\n")
  portfolio_result <- construct_1n_portfolio(
    asset_returns = data$stock_returns,
    window_size = 36  # 3 years of monthly data
  )
  
  # Analyze performance
  cat("\nAnalyzing portfolio performance...\n")
  analysis <- analyze_portfolio(
    portfolio_result = portfolio_result, 
    risk_free_rate = data$risk_free_rate[(36+1):length(data$risk_free_rate)],
    sp500_returns = data$sp500_returns
  )
  
  # Plot the portfolio returns alongside the benchmark
  portfolio_vs_benchmark <- merge(
    Portfolio = cumprod(1 + portfolio_result$returns) - 1, 
    Benchmark = cumprod(1 + data$sp500_returns[(36+1):length(data$sp500_returns)]) - 1
  )
  
  # Create a plot using base R graphics only to avoid PerformanceAnalytics issues
  plot.zoo(portfolio_vs_benchmark, 
       main = "1/N Portfolio vs. S&P 500", 
       col = c("blue", "red"),
       lwd = c(2, 2),
       plot.type = "single",
       xlab = "Date",
       ylab = "Cumulative Return")
  
  # Add a legend manually
  legend("topleft", 
         legend = c("1/N Portfolio", "S&P 500"),
         col = c("blue", "red"),
         lwd = c(2, 2),
         bg = "white")
  
  # Save the plot using only base R functions to avoid PerformanceAnalytics compatibility issues
  output_file <- file.path(base_dir, "1n_portfolio_vs_benchmark.png")
  png(output_file, width = 1200, height = 800, res = 120)
  par(mar = c(5, 4, 4, 4) + 0.1)  # Increase margins for better visibility
  
  # Use plot.zoo instead of PerformanceAnalytics functions
  plot.zoo(portfolio_vs_benchmark, 
           main = "1/N Portfolio vs. S&P 500",
           col = c("blue", "red"),
           lwd = c(2, 2),
           plot.type = "single",
           xlab = "Date",
           ylab = "Cumulative Return")
  
  # Add grid lines manually
  grid(lty = "dotted", col = "lightgray")
  
  # Add legend manually
  legend("topleft", 
         legend = c("1/N Portfolio", "S&P 500"),
         col = c("blue", "red"),
         lwd = c(2, 2),
         bg = "white")
         
  dev.off()
  cat("Saved portfolio vs benchmark plot to:", output_file, "\n")
  
  # End timer
  end_time <- Sys.time()
  cat("\nAnalysis completed in", round(difftime(end_time, start_time, units = "mins"), 2), "minutes\n")
  
  # Return results
  return(list(
    portfolio = portfolio_result,
    analysis = analysis,
    data = data
  ))
}

# Execute the analysis and store the results
result <- run_analysis()
## Starting 1/N portfolio analysis at 2025-05-04 11:58:10.571428 
## Importing data from Excel files...
## Reading stock data from: C:/Users/lcyep/OneDrive - Instituto Tecnologico y de Estudios Superiores de Monterrey/Tec/Semestre 6/Risk/R, project/data.xlsx
## New names:
## • `` -> `...1`
## Reading market data from: C:/Users/lcyep/OneDrive - Instituto Tecnologico y de Estudios Superiores de Monterrey/Tec/Semestre 6/Risk/R, project/data2.xlsx
## New names:
## • `` -> `...1`
## 
## Data summary:
## - Number of stocks: 50 
## - Date range: 2005-01-31 to 2024-02-29 
## - Number of observations: 230 
## 
## Constructing 1/N portfolio...
## Starting 1/N portfolio construction with 194 investment periods
## Initial portfolio has 50 equally weighted assets
## Processing period 1 of 194 - Date: 2008-01-31 
## Processing period 20 of 194 - Date: 2009-08-31 
## Processing period 40 of 194 - Date: 2011-04-29 
## Processing period 60 of 194 - Date: 2012-12-31 
## Processing period 80 of 194 - Date: 2014-08-29 
## Processing period 100 of 194 - Date: 2016-04-29 
## Processing period 120 of 194 - Date: 2017-12-29 
## Processing period 140 of 194 - Date: 2019-08-30 
## Processing period 160 of 194 - Date: 2021-04-30 
## Processing period 180 of 194 - Date: 2022-12-30 
## Processing period 194 of 194 - Date: 2024-02-29 
## Saved portfolio weights to: C:/Users/lcyep/OneDrive - Instituto Tecnologico y de Estudios Superiores de Monterrey/Tec/Semestre 6/Risk/R, project/1n_portfolio_weights.csv 
## 
## Analyzing portfolio performance...
## 
## ----- Portfolio Performance Metrics -----
##                   Metric Portfolio Benchmark
## 1         Average Return    13.03%     9.04%
## 2     Standard Deviation    14.92%    16.18%
## 3           Sharpe Ratio     0.233     0.144
## 4 Upside-Potential Ratio     0.530     0.465
## Saved performance comparison to: C:/Users/lcyep/OneDrive - Instituto Tecnologico y de Estudios Superiores de Monterrey/Tec/Semestre 6/Risk/R, project/1n_performance_comparison.csv 
## Saved portfolio returns to: C:/Users/lcyep/OneDrive - Instituto Tecnologico y de Estudios Superiores de Monterrey/Tec/Semestre 6/Risk/R, project/1n_portfolio_returns.csv

## Saved performance table image to: C:/Users/lcyep/OneDrive - Instituto Tecnologico y de Estudios Superiores de Monterrey/Tec/Semestre 6/Risk/R, project/1n_performance_table.png

## Saved portfolio vs benchmark plot to: C:/Users/lcyep/OneDrive - Instituto Tecnologico y de Estudios Superiores de Monterrey/Tec/Semestre 6/Risk/R, project/1n_portfolio_vs_benchmark.png 
## 
## Analysis completed in 0.01 minutes
# Display a final message with key findings
cat("\n===== 1/N PORTFOLIO ANALYSIS SUMMARY =====\n")
## 
## ===== 1/N PORTFOLIO ANALYSIS SUMMARY =====
cat("Portfolio Strategy: 1/N equally weighted portfolio of all assets\n")
## Portfolio Strategy: 1/N equally weighted portfolio of all assets
cat("Initial weighing after 36 months, No rebalancing afterward\n\n")
## Initial weighing after 36 months, No rebalancing afterward
cat("Key Performance Metrics:\n")
## Key Performance Metrics:
cat("- Portfolio Average Return: ", sprintf("%.2f%%", mean(result$portfolio$returns) * 12 * 100), 
    " vs. S&P 500: ", sprintf("%.2f%%", mean(result$data$sp500_returns[(36+1):length(result$data$sp500_returns)]) * 12 * 100), "\n", sep="")
## - Portfolio Average Return: 13.03% vs. S&P 500: 9.04%
cat("- Portfolio Standard Deviation: ", sprintf("%.2f%%", sd(result$portfolio$returns) * sqrt(12) * 100), 
    " vs. S&P 500: ", sprintf("%.2f%%", sd(result$data$sp500_returns[(36+1):length(result$data$sp500_returns)]) * sqrt(12) * 100), "\n", sep="")
## - Portfolio Standard Deviation: 14.92% vs. S&P 500: 16.18%
cat("- Portfolio Sharpe Ratio: ", sprintf("%.3f", mean(result$portfolio$returns - result$data$risk_free_rate[(36+1):length(result$data$risk_free_rate)]) / sd(result$portfolio$returns)), 
    " vs. S&P 500: ", sprintf("%.3f", mean(result$data$sp500_returns[(36+1):length(result$data$sp500_returns)] - result$data$risk_free_rate[(36+1):length(result$data$risk_free_rate)]) / sd(result$data$sp500_returns[(36+1):length(result$data$sp500_returns)])), "\n", sep="")
## - Portfolio Sharpe Ratio: 0.233 vs. S&P 500: 0.144