Iterative Stock Visualizations

fin_query <- sprintf("SELECT StockTicker, NetInterestIncome, InterestExpense, InterestIncome, NormalizedIncome, TotalExpenses, ReportedOperatingIncome, StockholderNetIncome, NetIncome, TaxProvision, PretaxIncome, OperatingIncome, OperatingExpense,   ResearchAndDevelopment, GrossProfit, CostOfRevenue, TotalRevenue, OperatingRevenue
FROM financials")
financials <- dbGetQuery(conn, fin_query)

# Line plots for each stock with highest prices and lowest prices each week
for (ticker in stock_tickers) {
  price_query <- sprintf("SELECT StockTicker, High, Low, Date FROM history WHERE StockTicker = '%s';", ticker)
  price <- dbGetQuery(conn, price_query)
  
  price$Date <- as.Date(price$Date)
  
  line <- ggplot(price, aes(x = Date)) +
  geom_line(aes(y = High), color = "darkgreen", linewidth = 0.5) +
  geom_line(aes(y = Low), color = "darkred", linewidth = 0.5) +
  labs(
    title = sprintf("%s Price Over Time", ticker),
    x = "Date",
    y = "Price(USD)"
  ) +
  scale_x_date(date_breaks = "1 month", date_labels = "%b %Y") +
  theme_minimal() +
  theme(axis.text.x = element_text(angle = 45, hjust = 1))
  
  print(line)
  cat("Green represents a stock's highest share price each day for the past year")
  cat("\n")
  cat("Red represents a stock's lowest share price each day for the past year")
  
################################################################################################################
  # Scatter plots of trading volume for each stock
  volume_query <- sprintf("SELECT StockTicker, Date, Volume FROM history WHERE StockTicker = '%s';", ticker)
  volume <- dbGetQuery(conn, volume_query)
  
  volume$Date <- as.Date(volume$Date)
  volume$VolumeMillions <- volume$Volume / 1e6
  volume$Volume <- NULL
  
  scatter <- ggplot(volume, aes(x = Date, y = VolumeMillions)) +
  geom_point(color = "black", alpha = 0.7) +
  labs(title = sprintf("%s Volume ScatterPlot", ticker),
       x = "Date",
       y = "Trading Volume (Millions)") +
  scale_x_date(date_breaks = "1 month", date_labels = "%d-%b") +
  theme_minimal() +
  theme(axis.text.x = element_text(angle = 45, hjust = 1))
  
  print(scatter)
  
############################################################################################################
  # Scatter plots with linear regression line
  volume_query <- sprintf("SELECT StockTicker, Date, Volume FROM history WHERE StockTicker = '%s';", ticker)
  volume <- dbGetQuery(conn, volume_query)
  
  volume$Date <- as.Date(volume$Date)
  volume$VolumeMillions <- volume$Volume / 1e6
  volume$Volume <- NULL
  
  # Linear Regression on each scatterplot
  volume$DateNumeric <- as.numeric(volume$Date)
  model <- lm(VolumeMillions ~ DateNumeric, data = volume)
  volume$Predicted <- predict(model)
  
  scatter <- ggplot(volume, aes(x = Date, y = VolumeMillions)) +
  geom_point(color = "black", alpha = 0.7) +
  geom_line(aes(y = Predicted), color = "red", linewidth = 1) +
  labs(title = sprintf("%s Volume ScatterPlot w/ LR", ticker),
       x = "Date",
       y = "Trading Volume (Millions)") +
  scale_x_date(date_breaks = "1 month", date_labels = "%d-%b") +
  theme_minimal() +
  theme(axis.text.x = element_text(angle = 45, hjust = 1))
  
  print(scatter)
  
  # Next day trading volume prediction
  date <- "2025-04-29"
  future_date <- as.numeric(as.Date(date))
  predicted_volume <- predict(model, newdata = data.frame(DateNumeric = future_date))
  cat(sprintf("Predicted trading volume for %s:", date), predicted_volume)
  cat("\n")
}

## Green represents a stock's highest share price each day for the past year
## Red represents a stock's lowest share price each day for the past year

## Predicted trading volume for 2025-04-29: 53.58444

## Green represents a stock's highest share price each day for the past year
## Red represents a stock's lowest share price each day for the past year

## Predicted trading volume for 2025-04-29: 24.95598

## Green represents a stock's highest share price each day for the past year
## Red represents a stock's lowest share price each day for the past year

## Predicted trading volume for 2025-04-29: 222.9994

## Green represents a stock's highest share price each day for the past year
## Red represents a stock's lowest share price each day for the past year

## Predicted trading volume for 2025-04-29: 32.91633

## Green represents a stock's highest share price each day for the past year
## Red represents a stock's lowest share price each day for the past year

## Predicted trading volume for 2025-04-29: 11.93133

## Green represents a stock's highest share price each day for the past year
## Red represents a stock's lowest share price each day for the past year

## Predicted trading volume for 2025-04-29: 6.412213

## Green represents a stock's highest share price each day for the past year
## Red represents a stock's lowest share price each day for the past year

## Predicted trading volume for 2025-04-29: 22.91701

## Green represents a stock's highest share price each day for the past year
## Red represents a stock's lowest share price each day for the past year

## Predicted trading volume for 2025-04-29: 4.845563

## Green represents a stock's highest share price each day for the past year
## Red represents a stock's lowest share price each day for the past year

## Predicted trading volume for 2025-04-29: 3.172904

## Green represents a stock's highest share price each day for the past year
## Red represents a stock's lowest share price each day for the past year

## Predicted trading volume for 2025-04-29: 4.238243

PCA Clustering Using Financials

# Use mean imputation to replace missing values in financials data frame
fins_imputed <- financials %>%
  mutate(across(where(is.numeric), ~ ifelse(is.na(.), mean(., na.rm = TRUE), .)))

# Aggregate data for each company (Average values over time)
fins_summary <- fins_imputed %>%
  group_by(StockTicker) %>%
  summarise(across(where(is.numeric), ~ mean(.x, na.rm = TRUE)))

# Principal Component Analysis
pca_result <- prcomp(fins_summary %>% select(-StockTicker), scale. = TRUE)

# Dataframe of PCA results
pca_df <- as.data.frame(pca_result$x)
pca_df$StockTicker <- fins_summary$StockTicker

# Plot PCA
ggplot(pca_df, aes(x = PC1, y = PC2)) +
  geom_point(alpha = 0.7, color = "darkblue") +
  geom_text_repel(aes(label = StockTicker), size = 4, box.padding = 0.35, point.padding = 0.3) +
  labs(title = "PCA of Company Financial Data",
       x = "Company Size",
       y = "Operational Efficiency"
  ) +
  coord_cartesian(ylim = c(-3, 3), xlim = c(-5, 10)) +
  theme_minimal()