# Install and load necessary packages
install.packages("quantmod")
## Installing package into '/cloud/lib/x86_64-pc-linux-gnu-library/4.4'
## (as 'lib' is unspecified)
install.packages("dplyr")
## Installing package into '/cloud/lib/x86_64-pc-linux-gnu-library/4.4'
## (as 'lib' is unspecified)
install.packages("ggplot2")
## Installing package into '/cloud/lib/x86_64-pc-linux-gnu-library/4.4'
## (as 'lib' is unspecified)
install.packages("tidyverse")
## Installing package into '/cloud/lib/x86_64-pc-linux-gnu-library/4.4'
## (as 'lib' is unspecified)
install.packages("PerformanceAnalytics")
## Installing package into '/cloud/lib/x86_64-pc-linux-gnu-library/4.4'
## (as 'lib' is unspecified)
library(quantmod)
## Loading required package: xts
## Loading required package: zoo
## 
## Attaching package: 'zoo'
## The following objects are masked from 'package:base':
## 
##     as.Date, as.Date.numeric
## Loading required package: TTR
## Registered S3 method overwritten by 'quantmod':
##   method            from
##   as.zoo.data.frame zoo
library(dplyr)
## 
## ######################### Warning from 'xts' package ##########################
## #                                                                             #
## # The dplyr lag() function breaks how base R's lag() function is supposed to  #
## # work, which breaks lag(my_xts). Calls to lag(my_xts) that you type or       #
## # source() into this session won't work correctly.                            #
## #                                                                             #
## # Use stats::lag() to make sure you're not using dplyr::lag(), or you can add #
## # conflictRules('dplyr', exclude = 'lag') to your .Rprofile to stop           #
## # dplyr from breaking base R's lag() function.                                #
## #                                                                             #
## # Code in packages is not affected. It's protected by R's namespace mechanism #
## # Set `options(xts.warn_dplyr_breaks_lag = FALSE)` to suppress this warning.  #
## #                                                                             #
## ###############################################################################
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:xts':
## 
##     first, last
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library(ggplot2)
library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ forcats   1.0.0     ✔ stringr   1.5.1
## ✔ lubridate 1.9.4     ✔ tibble    3.2.1
## ✔ purrr     1.0.4     ✔ tidyr     1.3.1
## ✔ readr     2.1.5
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::first()  masks xts::first()
## ✖ dplyr::lag()    masks stats::lag()
## ✖ dplyr::last()   masks xts::last()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(PerformanceAnalytics)
## 
## Attaching package: 'PerformanceAnalytics'
## 
## The following object is masked from 'package:graphics':
## 
##     legend
# Function to fetch stock data and calculate cumulative returns
calculate_stock_returns <- function(ticker, start_date, end_date) {
  stock_data <- tryCatch({
    getSymbols(ticker, src = "yahoo", from = start_date, to = end_date, auto.assign = FALSE)
  }, error = function(e) {
    return(NULL)
  })
  
  if (!is.null(stock_data)) {
    stock_prices <- Cl(stock_data)  # Closing prices
    stock_returns <- dailyReturn(stock_prices)
    return(stock_returns)
  }
  return(NULL)
}

# Example data for Pelosi's trades (replace with actual data)
pelosi_trades <- data.frame(
  politician = "Nancy Pelosi",
  ticker = c("AAPL", "NVDA", "PYPL"),
  transaction_date = as.Date(c("2021-12-31", "2022-12-20", "2023-12-28")) # Adjusted for past data
)

# Define start & end date range for stock returns
start_date <- min(pelosi_trades$transaction_date)
end_date <- Sys.Date()

# Fetch stock returns for each traded stock
stock_returns_list <- lapply(pelosi_trades$ticker, function(ticker) {
  calculate_stock_returns(ticker, start_date, end_date)
})

# Merge stock returns into one dataset
names(stock_returns_list) <- pelosi_trades$ticker
stock_returns_list <- stock_returns_list[!sapply(stock_returns_list, is.null)]
merged_returns <- do.call(merge, stock_returns_list)

# Calculate Pelosi's portfolio return (average of all stock returns)
pelosi_portfolio_return <- rowMeans(merged_returns, na.rm = TRUE)

# Fetch S&P 500 returns as benchmark
sp500_data <- getSymbols("^GSPC", src = "yahoo", from = start_date, to = end_date, auto.assign = FALSE)
sp500_returns <- dailyReturn(Cl(sp500_data))

# Merge Pelosi’s returns with S&P 500 returns
all_returns <- merge(xts(pelosi_portfolio_return, order.by = index(merged_returns)), sp500_returns, all = FALSE)
colnames(all_returns) <- c("Pelosi Portfolio", "S&P 500")

# Calculate cumulative returns
cumulative_returns <- cumprod(1 + all_returns) - 1

# Convert to data frame for ggplot
returns_df <- data.frame(
  Date = index(cumulative_returns),
  Pelosi_Return = cumulative_returns[, "Pelosi Portfolio"],
  SP500_Return = cumulative_returns[, "S&P 500"]
) %>%
  gather(key = "Investor", value = "Cumulative_Return", -Date)

# Plot Pelosi's cumulative return vs S&P 500
ggplot(returns_df, aes(x = Date, y = Cumulative_Return, color = Investor)) +
  geom_line(size = 1.2) +
  labs(
    title = "Nancy Pelosi's Portfolio Performance vs S&P 500",
    x = "Date",
    y = "Cumulative Return",
    color = "Investor"
  ) +
  scale_y_continuous(labels = scales::percent_format()) +
  theme_minimal()
## Warning: Using `size` aesthetic for lines was deprecated in ggplot2 3.4.0.
## ℹ Please use `linewidth` instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.