Iterative Stock Visualizations
fin_query <- sprintf("SELECT StockTicker, NetInterestIncome, InterestExpense, InterestIncome, NormalizedIncome, TotalExpenses, ReportedOperatingIncome, StockholderNetIncome, NetIncome, TaxProvision, PretaxIncome, OperatingIncome, OperatingExpense, ResearchAndDevelopment, GrossProfit, CostOfRevenue, TotalRevenue, OperatingRevenue
FROM financials")
financials <- dbGetQuery(conn, fin_query)
# Line plots for each stock with highest prices and lowest prices each week
for (ticker in stock_tickers) {
price_query <- sprintf("SELECT StockTicker, High, Low, Date FROM history WHERE StockTicker = '%s';", ticker)
price <- dbGetQuery(conn, price_query)
price$Date <- as.Date(price$Date)
line <- ggplot(price, aes(x = Date)) +
geom_line(aes(y = High), color = "darkgreen", linewidth = 0.5) +
geom_line(aes(y = Low), color = "darkred", linewidth = 0.5) +
labs(
title = sprintf("%s Price Over Time", ticker),
x = "Date",
y = "Price(USD)"
) +
scale_x_date(date_breaks = "1 month", date_labels = "%b %Y") +
theme_minimal() +
theme(axis.text.x = element_text(angle = 45, hjust = 1))
print(line)
cat("Green represents a stock's highest share price each day for the past year")
cat("\n")
cat("Red represents a stock's lowest share price each day for the past year")
################################################################################################################
# Scatter plots of trading volume for each stock
volume_query <- sprintf("SELECT StockTicker, Date, Volume FROM history WHERE StockTicker = '%s';", ticker)
volume <- dbGetQuery(conn, volume_query)
volume$Date <- as.Date(volume$Date)
volume$VolumeMillions <- volume$Volume / 1e6
volume$Volume <- NULL
scatter <- ggplot(volume, aes(x = Date, y = VolumeMillions)) +
geom_point(color = "black", alpha = 0.7) +
labs(title = sprintf("%s Volume ScatterPlot", ticker),
x = "Date",
y = "Trading Volume (Millions)") +
scale_x_date(date_breaks = "1 month", date_labels = "%d-%b") +
theme_minimal() +
theme(axis.text.x = element_text(angle = 45, hjust = 1))
print(scatter)
############################################################################################################
# Scatter plots with linear regression line
volume_query <- sprintf("SELECT StockTicker, Date, Volume FROM history WHERE StockTicker = '%s';", ticker)
volume <- dbGetQuery(conn, volume_query)
volume$Date <- as.Date(volume$Date)
volume$VolumeMillions <- volume$Volume / 1e6
volume$Volume <- NULL
# Linear Regression on each scatterplot
volume$DateNumeric <- as.numeric(volume$Date)
model <- lm(VolumeMillions ~ DateNumeric, data = volume)
volume$Predicted <- predict(model)
scatter <- ggplot(volume, aes(x = Date, y = VolumeMillions)) +
geom_point(color = "black", alpha = 0.7) +
geom_line(aes(y = Predicted), color = "red", linewidth = 1) +
labs(title = sprintf("%s Volume ScatterPlot w/ LR", ticker),
x = "Date",
y = "Trading Volume (Millions)") +
scale_x_date(date_breaks = "1 month", date_labels = "%d-%b") +
theme_minimal() +
theme(axis.text.x = element_text(angle = 45, hjust = 1))
print(scatter)
# Next day trading volume prediction
date <- "2025-04-29"
future_date <- as.numeric(as.Date(date))
predicted_volume <- predict(model, newdata = data.frame(DateNumeric = future_date))
cat(sprintf("Predicted trading volume for %s:", date), predicted_volume)
cat("\n")
}

## Green represents a stock's highest share price each day for the past year
## Red represents a stock's lowest share price each day for the past year


## Predicted trading volume for 2025-04-29: 53.58444

## Green represents a stock's highest share price each day for the past year
## Red represents a stock's lowest share price each day for the past year


## Predicted trading volume for 2025-04-29: 24.95598

## Green represents a stock's highest share price each day for the past year
## Red represents a stock's lowest share price each day for the past year


## Predicted trading volume for 2025-04-29: 222.9994

## Green represents a stock's highest share price each day for the past year
## Red represents a stock's lowest share price each day for the past year


## Predicted trading volume for 2025-04-29: 32.91633

## Green represents a stock's highest share price each day for the past year
## Red represents a stock's lowest share price each day for the past year


## Predicted trading volume for 2025-04-29: 11.93133

## Green represents a stock's highest share price each day for the past year
## Red represents a stock's lowest share price each day for the past year


## Predicted trading volume for 2025-04-29: 6.412213

## Green represents a stock's highest share price each day for the past year
## Red represents a stock's lowest share price each day for the past year


## Predicted trading volume for 2025-04-29: 22.91701

## Green represents a stock's highest share price each day for the past year
## Red represents a stock's lowest share price each day for the past year


## Predicted trading volume for 2025-04-29: 4.845563

## Green represents a stock's highest share price each day for the past year
## Red represents a stock's lowest share price each day for the past year


## Predicted trading volume for 2025-04-29: 3.172904

## Green represents a stock's highest share price each day for the past year
## Red represents a stock's lowest share price each day for the past year


## Predicted trading volume for 2025-04-29: 4.238243
PCA Clustering Using Financials
# Use mean imputation to replace missing values in financials data frame
fins_imputed <- financials %>%
mutate(across(where(is.numeric), ~ ifelse(is.na(.), mean(., na.rm = TRUE), .)))
# Aggregate data for each company (Average values over time)
fins_summary <- fins_imputed %>%
group_by(StockTicker) %>%
summarise(across(where(is.numeric), ~ mean(.x, na.rm = TRUE)))
# Principal Component Analysis
pca_result <- prcomp(fins_summary %>% select(-StockTicker), scale. = TRUE)
# Dataframe of PCA results
pca_df <- as.data.frame(pca_result$x)
pca_df$StockTicker <- fins_summary$StockTicker
# Plot PCA
ggplot(pca_df, aes(x = PC1, y = PC2)) +
geom_point(alpha = 0.7, color = "darkblue") +
geom_text_repel(aes(label = StockTicker), size = 4, box.padding = 0.35, point.padding = 0.3) +
labs(title = "PCA of Company Financial Data",
x = "Company Size",
y = "Operational Efficiency"
) +
coord_cartesian(ylim = c(-3, 3), xlim = c(-5, 10)) +
theme_minimal()
