# Load necessary libraries
library(ggplot2)
library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library(packcircles)
library(readxl)

# Load crash data
crash_data <- read_excel("crash.xlsx")

# Remove rows where State is "-"
crash_data <- crash_data %>% filter(State != "-")

# Summarize crash counts by state
state_crash_counts <- crash_data %>%
  group_by(State) %>%
  summarise(Total_Deaths = n()) %>%
  arrange(desc(Total_Deaths))

# Generate layout for packed circles
packing <- circleProgressiveLayout(state_crash_counts$Total_Deaths, sizetype = 'area')

# Merge layout data with state crash counts
state_crash_counts <- cbind(state_crash_counts, packing)

# Generate coordinates for circle borders
dat.gg <- circleLayoutVertices(packing, npoints = 100)

# Create the clustered bubble chart with temperature-based colors
ggplot() +  # Correct way to initialize ggplot
  geom_polygon(data = dat.gg, aes(x, y, group = id, fill = state_crash_counts$Total_Deaths[id]), 
               colour = "black", alpha = 0.8) +  # Darker borders for visibility
  geom_text(data = state_crash_counts, aes(x, y, label = State), 
            size = 4, fontface = "bold", color = "white") +  # Improve text readability
  scale_fill_gradient(name = "Total Deaths", low = "yellow", high = "red") +  # Rename legend title
  theme_void() +  # Remove background and grid
  theme(legend.position = "right") +  # Show legend on the right
  ggtitle("Tesla Fatal Crashes by State") +  # Shortened title
  theme(plot.title = element_text(hjust = 0.5, face = "bold", size = 14))  # Center and style title

# Load necessary libraries
library(ggplot2)
library(dplyr)
library(usmap)
library(readxl)

# Load crash data
crash_data <- read_excel("crash.xlsx")

# Remove rows where State is "-"
crash_data <- crash_data %>% filter(State != "-")

# Summarize crash counts by state
state_crash_counts <- crash_data %>%
  group_by(State) %>%
  summarise(Total_Deaths = n()) %>%
  rename(state = State)  # Rename column to match 'usmap' requirements

# Plot the choropleth map
plot_usmap(data = state_crash_counts, values = "Total_Deaths", regions = "states") +
  scale_fill_gradient(name = "Total Deaths", low = "yellow", high = "red") +  # Temperature-based gradient
  labs(title = "Fatal Tesla Crashes by U.S. State") +  # Short, clear title
  theme(legend.position = "right",
        plot.title = element_text(hjust = 0.5, face = "bold", size = 14))  # Style title and legend
## Warning in CPL_transform(x, crs, aoi, pipeline, reverse, desired_accuracy, :
## GDAL Error 1: PROJ: proj_create_from_database: crs not found
## Warning in CPL_transform(x, crs, aoi, pipeline, reverse, desired_accuracy, :
## GDAL Error 1: PROJ: proj_create_from_database: crs not found
## Warning in CPL_transform(x, crs, aoi, pipeline, reverse, desired_accuracy, :
## GDAL Error 1: PROJ: proj_create_from_database: crs not found
## Warning in CPL_transform(x, crs, aoi, pipeline, reverse, desired_accuracy, :
## GDAL Error 1: PROJ: proj_create_from_database: crs not found
## Warning in CPL_transform(x, crs, aoi, pipeline, reverse, desired_accuracy, :
## GDAL Error 1: PROJ: proj_create_from_database: crs not found
## Warning in CPL_transform(x, crs, aoi, pipeline, reverse, desired_accuracy, :
## GDAL Error 1: PROJ: proj_create_from_database: crs not found
## Warning in CPL_transform(x, crs, aoi, pipeline, reverse, desired_accuracy, :
## GDAL Error 1: PROJ: proj_create_from_database: crs not found
## Warning in CPL_transform(x, crs, aoi, pipeline, reverse, desired_accuracy, :
## GDAL Error 1: PROJ: proj_create_from_database: crs not found

# Load necessary libraries
library(ggplot2)
library(dplyr)
library(rnaturalearth)
library(rnaturalearthdata)
## 
## Attaching package: 'rnaturalearthdata'
## The following object is masked from 'package:rnaturalearth':
## 
##     countries110
library(sf)
## Linking to GEOS 3.8.0, GDAL 3.0.4, PROJ 6.3.1; sf_use_s2() is TRUE
# Load crash data
crash_data <- read_excel("crash.xlsx")

# Summarize crash counts by country
country_crash_counts <- crash_data %>%
  group_by(Country) %>%
  summarise(Total_Deaths = n())

# Load world map data
world <- ne_countries(scale = "medium", returnclass = "sf")

# Merge crash data with world map
world_crash_map <- world %>%
  left_join(country_crash_counts, by = c("name" = "Country"))

# Plot world choropleth map with clean visualization
ggplot(world_crash_map) +
  geom_sf(aes(fill = Total_Deaths), color = "black", size = 0.1) +  # Country borders
  scale_fill_gradient(low = "yellow", high = "orangered3", na.value = "gray90") +  # Yellow-to-red transition
  labs(title = "Tesla-Related Fatal Crashes by Country",
       fill = "Total Deaths") +
  theme_void() +  # Removes background, grid lines, and unnecessary elements
  theme(legend.position = "bottom")  # Moves legend to bottom for better visibility

# Load necessary libraries
library(plotly)
## 
## Attaching package: 'plotly'
## The following object is masked from 'package:ggplot2':
## 
##     last_plot
## The following object is masked from 'package:stats':
## 
##     filter
## The following object is masked from 'package:graphics':
## 
##     layout
library(dplyr)
library(readxl)

# Load crash data
crash_data <- read_excel("crash.xlsx")

# Summarize crash counts by country
country_crash_counts <- crash_data %>%
  group_by(Country) %>%
  summarise(Total_Deaths = n()) %>%
  arrange(desc(Total_Deaths))  # Sort from highest to lowest

# Define threshold: Keep top 5 countries, group others into "Other"
threshold <- 5
top_countries <- country_crash_counts[1:threshold, ]
other_countries <- sum(country_crash_counts$Total_Deaths[(threshold+1):nrow(country_crash_counts)], na.rm = TRUE)

# Create a new dataframe with "Other" category
simplified_data <- bind_rows(
  top_countries,
  tibble(Country = "Other", Total_Deaths = other_countries)
)

# Create 3D pie chart using Plotly
fig <- plot_ly(
  simplified_data,
  labels = ~Country,
  values = ~Total_Deaths,
  type = "pie",
  textinfo = "label+percent",
  marker = list(colors = colorRampPalette(c("yellow", "red", "darkred"))(nrow(simplified_data))), # Color gradient
  hole = 0,  # No hole (Full pie)
  pull = 0.05  # Slightly separate slices for better visibility
) %>%
  layout(
    title = "🚗 Tesla-Related Fatal Crashes by Country (3D Pie Chart - Simplified)",
    showlegend = TRUE
  )

# Show the interactive 3D pie chart
fig
# Load necessary libraries
library(ggplot2)
library(dplyr)
library(readxl)
library(tidyr)  # Load tidyr for replace_na()

# Load and preprocess crash data
crash_data <- read_excel("crash.xlsx") %>%
  group_by(Date) %>%
  summarise(Total_Deaths = sum(Deaths, na.rm = TRUE))  # Aggregate deaths per date

# Load and preprocess Tesla stock data
tsla_stock <- read_excel("Tsla .xlsx") %>%
  rename(Date = Date, Stock_Price = `Close/Last`) %>%  # Rename columns
  mutate(Date = as.Date(Date))  # Ensure Date format is consistent

# Convert crash data Date to Date type
crash_data$Date <- as.Date(crash_data$Date)

# Merge datasets by Date
merged_data <- left_join(crash_data, tsla_stock, by = "Date") %>%
  mutate(Total_Deaths = replace_na(Total_Deaths, 0))  # Replace NA deaths with 0

# Create scatter plot with regression line
ggplot(merged_data, aes(x = Total_Deaths, y = Stock_Price)) +
  geom_jitter(color = "blue", size = 2, alpha = 0.6, width = 0.2) +  # Adds scatter effect
  geom_smooth(method = "lm", color = "red", se = TRUE) +  # Adds regression line
  scale_x_continuous(limits = c(0, max(merged_data$Total_Deaths, na.rm = TRUE) * 0.8)) +  # Adjust X-axis
  scale_y_continuous(limits = c(min(merged_data$Stock_Price, na.rm = TRUE) * 0.9, 
                                max(merged_data$Stock_Price, na.rm = TRUE) * 1.1)) +  # Adjust Y-axis
  labs(title = paste("Tesla-Related Deaths vs. Stock Price\nCorrelation:", 
                     round(cor(merged_data$Total_Deaths, merged_data$Stock_Price, use = "complete.obs"), 3)),
       x = "Daily Tesla-Related Deaths",
       y = "Tesla Stock Price (Close/Last)") +
  theme_minimal() +
  theme(axis.text.x = element_text(size = 10),  # Decrease X-axis text size
        axis.text.y = element_text(size = 14))  # Increase Y-axis text size
## `geom_smooth()` using formula = 'y ~ x'
## Warning: Removed 33 rows containing non-finite outside the scale range
## (`stat_smooth()`).
## Warning: Removed 33 rows containing missing values or values outside the scale range
## (`geom_point()`).

# Load necessary libraries
library(ggplot2)
library(readxl)
library(dplyr)

# Load the dataset
crash_data <- read_excel("crash.xlsx")

# Replace "-" with "Non-autonomous driver"
crash_data$`Verified Tesla Autopilot Deaths` <- ifelse(crash_data$`Verified Tesla Autopilot Deaths` == "-", 
                                                       "Non-autonomous driver", 
                                                       crash_data$`Verified Tesla Autopilot Deaths`)

# Group "1" and "3" together as "Autopilot Confirmed"
crash_data$`Verified Tesla Autopilot Deaths` <- ifelse(crash_data$`Verified Tesla Autopilot Deaths` %in% c("1", "3"), 
                                                       "Autopilot Confirmed", 
                                                       crash_data$`Verified Tesla Autopilot Deaths`)

# Remove NA values and count occurrences
death_counts <- crash_data %>%
  filter(!is.na(`Verified Tesla Autopilot Deaths`)) %>%
  count(`Verified Tesla Autopilot Deaths`)

# Convert counts to percentages
death_counts <- death_counts %>%
  mutate(percentage = (n / sum(n)) * 100,
         label = paste0(`Verified Tesla Autopilot Deaths`, "\n", round(percentage, 1), "%"))  

# Create Donut Chart
ggplot(death_counts, aes(x = "", y = percentage, fill = as.factor(`Verified Tesla Autopilot Deaths`))) +
  geom_bar(stat = "identity", width = 1, color = "white") +  
  coord_polar("y", start = 0) +  
  geom_text(aes(label = label), position = position_stack(vjust = 0.5), size = 5) +  
  scale_fill_manual(values = c("Autopilot Confirmed" = "lightblue", "Non-autonomous driver" = "orange")) +  
  theme_void() +  
  labs(title = "Tesla Autopilot vs Non-Autopilot Fatalities",
       fill = "Category")

library(quantmod)
## Loading required package: xts
## Loading required package: zoo
## 
## Attaching package: 'zoo'
## The following objects are masked from 'package:base':
## 
##     as.Date, as.Date.numeric
## 
## ######################### Warning from 'xts' package ##########################
## #                                                                             #
## # The dplyr lag() function breaks how base R's lag() function is supposed to  #
## # work, which breaks lag(my_xts). Calls to lag(my_xts) that you type or       #
## # source() into this session won't work correctly.                            #
## #                                                                             #
## # Use stats::lag() to make sure you're not using dplyr::lag(), or you can add #
## # conflictRules('dplyr', exclude = 'lag') to your .Rprofile to stop           #
## # dplyr from breaking base R's lag() function.                                #
## #                                                                             #
## # Code in packages is not affected. It's protected by R's namespace mechanism #
## # Set `options(xts.warn_dplyr_breaks_lag = FALSE)` to suppress this warning.  #
## #                                                                             #
## ###############################################################################
## 
## Attaching package: 'xts'
## The following objects are masked from 'package:dplyr':
## 
##     first, last
## Loading required package: TTR
## Registered S3 method overwritten by 'quantmod':
##   method            from
##   as.zoo.data.frame zoo
library(lubridate)
## 
## Attaching package: 'lubridate'
## The following objects are masked from 'package:base':
## 
##     date, intersect, setdiff, union
getSymbols('TSLA', src = 'yahoo', 
           from="2024-01-01", to="2024-12-31")
## [1] "TSLA"
stocks <- as.xts(data.frame(
                          TSLA = TSLA$"TSLA.Close"))

plot(as.zoo(stocks),screens = 1,lty = c(1,3),
     col = c("blue") ,xlab = "date",ylab = "Price")
legend("top",c("TSLA"),
       lty = c(1,3),col = c("blue"),cex = 0.5)

library(ggplot2)
library(readxl)
library(dplyr)
library(lubridate)

# Load the dataset
data <- read_excel("crash.xlsx")

# Convert Date column to Date format
data$Date <- as.Date(data$Date)

# Extract Year and Month
data$Month <- floor_date(data$Date, "month")

# Summarize deaths per month
monthly_deaths <- data %>% 
  group_by(Month) %>% 
  summarize(Total_Deaths = sum(Deaths, na.rm = TRUE))

# Plot the line graph
ggplot(monthly_deaths, aes(x = Month, y = Total_Deaths)) +
  geom_line(color = "blue", size = 1) +
  geom_point(color = "red", size = 2) +
  labs(title = "Monthly Tesla-Related Deaths",
       x = "Month",
       y = "Total Deaths") +
  theme_minimal()
## Warning: Using `size` aesthetic for lines was deprecated in ggplot2 3.4.0.
## ℹ Please use `linewidth` instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.

# Load necessary libraries
library(ggplot2)
library(readxl)
library(dplyr)
library(lubridate)

# Load the dataset (Make sure the file name is correct)
data <- read_excel("Tsla .xlsx")

# Convert Date column to Date format
data$Date <- as.Date(data$Date)

# Extract Year and Month
data$Month <- floor_date(data$Date, "month")

# Ensure Stock Price column is numeric
data$`Close/Last` <- as.numeric(data$`Close/Last`)

# Summarize stock price per month (use mean, not sum)
monthly_StockPrice <- data %>% 
  group_by(Month) %>% 
  summarize(Total_StockPrice = mean(`Close/Last`, na.rm = TRUE)) %>% 
  ungroup()

# Plot the line graph
ggplot(monthly_StockPrice, aes(x = Month, y = Total_StockPrice)) +
  geom_line(color = "blue", size = 1) +
  geom_point(color = "red", size = 2) +
  labs(title = "Monthly Tesla Stock Price",
       x = "Month",
       y = "Average Stock Price") +
  theme_minimal()

# Load necessary libraries
library(ggplot2)
library(readxl)
library(dplyr)
library(lubridate)
library(scales)

# Load datasets
tsla_data <- read_excel("Tsla .xlsx")
crash_data <- read_excel("crash.xlsx")

# Convert Date column to Date format
tsla_data$Date <- as.Date(tsla_data$Date)
crash_data$Date <- as.Date(crash_data$Date)

# Extract Month
tsla_data$Month <- floor_date(tsla_data$Date, "month")
crash_data$Month <- floor_date(crash_data$Date, "month")

# Ensure numeric values
tsla_data$`Close/Last` <- as.numeric(tsla_data$`Close/Last`)
crash_data$Deaths <- as.numeric(crash_data$Deaths)

# Summarize Stock Prices (Average per Month)
monthly_stock <- tsla_data %>%
  group_by(Month) %>%
  summarize(Average_Stock_Price = mean(`Close/Last`, na.rm = TRUE))

# Summarize Deaths (Total per Month)
monthly_deaths <- crash_data %>%
  group_by(Month) %>%
  summarize(Total_Deaths = sum(Deaths, na.rm = TRUE))

# 🟢 Line Graph: Tesla Stock Prices Over Time
ggplot(monthly_stock, aes(x = Month, y = Average_Stock_Price)) +
  geom_line(color = "darkblue", size = 1.2, linetype = "solid") +  
  geom_point(color = "red", size = 3, shape = 19) +  
  geom_smooth(method = "loess", se = FALSE, color = "orange", linetype = "dashed") +  
  labs(title = "📈 Tesla Monthly Stock Prices",
       subtitle = "A visualization of Tesla stock trends",
       x = "Month",
       y = "Average Stock Price ($)") +
  theme_minimal() +
  scale_x_date(labels = date_format("%b %Y"), breaks = "2 months") 
## `geom_smooth()` using formula = 'y ~ x'

# 🔴 Bar Graph: Tesla-Related Deaths Per Month
ggplot(monthly_deaths, aes(x = Month, y = Total_Deaths, fill = Total_Deaths)) +
  geom_bar(stat = "identity", color = "black", show.legend = FALSE) +  
  scale_fill_gradient(low = "yellow", high = "red") +  
  labs(title = "🚗 Tesla-Related Deaths Per Month",
       subtitle = "Crash fatalities involving Tesla vehicles",
       x = "Month",
       y = "Total Deaths") +
  theme_minimal() +
  theme(axis.text.x = element_text(angle = 45, hjust = 1)) +
  scale_x_date(labels = date_format("%b %Y"), breaks = "2 months")

# Load necessary libraries
library(ggplot2)
library(readxl)
library(dplyr)
library(lubridate)
library(scales)
library(patchwork)  # For side-by-side plots

# Load datasets
tsla_data <- read_excel("Tsla .xlsx")
crash_data <- read_excel("crash.xlsx")

# Convert Date column to Date format
tsla_data$Date <- as.Date(tsla_data$Date)
crash_data$Date <- as.Date(crash_data$Date)

# Extract Month
tsla_data$Month <- floor_date(tsla_data$Date, "month")
crash_data$Month <- floor_date(crash_data$Date, "month")

# Ensure numeric values
tsla_data$`Close/Last` <- as.numeric(tsla_data$`Close/Last`)
crash_data$Deaths <- as.numeric(crash_data$Deaths)

# Summarize Stock Prices (Average per Month)
monthly_stock <- tsla_data %>%
  group_by(Month) %>%
  summarize(Average_Stock_Price = mean(`Close/Last`, na.rm = TRUE))

# Summarize Deaths (Total per Month)
monthly_deaths <- crash_data %>%
  group_by(Month) %>%
  summarize(Total_Deaths = sum(Deaths, na.rm = TRUE))

# 🟢 Line Graph: Tesla Stock Prices Over Time
stock_plot <- ggplot(monthly_stock, aes(x = Month, y = Average_Stock_Price)) +
  geom_line(color = "darkblue", size = 1.2, linetype = "solid") +  
  geom_point(color = "red", size = 3, shape = 19) +  
  geom_smooth(method = "loess", se = FALSE, color = "orange", linetype = "dashed") +  
  labs(title = "📈 Tesla Monthly Stock Prices",
       subtitle = "A visualization of Tesla stock trends",
       x = "Month",
       y = "Average Stock Price ($)") +
  theme_minimal() +
  scale_x_date(labels = date_format("%b %Y"), breaks = "2 months") 

# 🔴 Bar Graph: Tesla-Related Deaths Per Month
deaths_plot <- ggplot(monthly_deaths, aes(x = Month, y = Total_Deaths, fill = Total_Deaths)) +
  geom_bar(stat = "identity", color = "black", show.legend = FALSE) +  
  scale_fill_gradient(low = "yellow", high = "red") +  
  labs(title = "🚗 Tesla-Related Deaths Per Month",
       subtitle = "Crash fatalities involving Tesla vehicles",
       x = "Month",
       y = "Total Deaths") +
  theme_minimal() +
  theme(axis.text.x = element_text(angle = 45, hjust = 1)) +
  scale_x_date(labels = date_format("%b %Y"), breaks = "2 months")

# 📌 Combine both plots side by side
stock_plot + deaths_plot
## `geom_smooth()` using formula = 'y ~ x'

# Load necessary libraries
library(ggplot2)
library(readxl)
library(dplyr)
library(lubridate)
library(scales)

# Load datasets
tsla_data <- read_excel("Tsla .xlsx")
crash_data <- read_excel("crash.xlsx")

# Convert Date column to Date format
tsla_data$Date <- as.Date(tsla_data$Date)
crash_data$Date <- as.Date(crash_data$Date)

# Extract Month
tsla_data$Month <- floor_date(tsla_data$Date, "month")
crash_data$Month <- floor_date(crash_data$Date, "month")

# Ensure numeric values
tsla_data$`Close/Last` <- as.numeric(tsla_data$`Close/Last`)
crash_data$Deaths <- as.numeric(crash_data$Deaths)

# Summarize Stock Prices (Average per Month)
monthly_stock <- tsla_data %>%
  group_by(Month) %>%
  summarize(Average_Stock_Price = mean(`Close/Last`, na.rm = TRUE))

# Summarize Deaths (Total per Month)
monthly_deaths <- crash_data %>%
  group_by(Month) %>%
  summarize(Total_Deaths = sum(Deaths, na.rm = TRUE))

# Merge both datasets by Month
combined_data <- left_join(monthly_stock, monthly_deaths, by = "Month")

# Plot Combined Graph: Stock Prices (Line) & Deaths (Bars)
ggplot(combined_data, aes(x = Month)) +
  # 🔴 Bar chart for Tesla-related deaths
  geom_bar(aes(y = Total_Deaths * 50), stat = "identity", fill = "red", alpha = 0.5) +  
  # 🔵 Line graph for Tesla stock prices
  geom_line(aes(y = Average_Stock_Price), color = "blue", size = 1.5) +  
  geom_point(aes(y = Average_Stock_Price), color = "blue", size = 3) +
  # Labels and Titles
  labs(title = "📊 Tesla Stock Prices & Crash-Related Deaths",
       subtitle = "Comparing Tesla's market performance with accident data",
       x = "Month",
       y = "Stock Price ($) / Deaths (scaled)") +
  # Custom Theme
  theme_minimal() +
  scale_x_date(labels = date_format("%b %Y"), breaks = "2 months") +
  # Second y-axis for deaths (scaled)
  scale_y_continuous(
    name = "Stock Price ($)",
    sec.axis = sec_axis(~ . / 50, name = "Total Deaths")
  )

# Load required libraries
library(ggplot2)
library(readxl)
library(dplyr)
library(packcircles)

# Load crash data
crash_data <- read_excel("crash.xlsx")

# Ensure Deaths column is numeric
crash_data$Deaths <- as.numeric(crash_data$Deaths)

# Standardize model names
crash_data$Model <- toupper(crash_data$Model)  # Convert all to uppercase for consistency

# Remove NAs, unidentified models, and unwanted values ("-" and "5")
clean_data <- crash_data %>%
  filter(!is.na(Model) & Model != "" & Model != "UNKNOWN" & Model != "-" & Model != "5")

# Count occurrences of each Tesla model
model_data <- clean_data %>%
  group_by(Model) %>%
  summarize(Total_Deaths = sum(Deaths, na.rm = TRUE)) %>%
  ungroup()

# Pack bubbles using `packcircles`
bubble_layout <- circleProgressiveLayout(model_data$Total_Deaths, sizetype = "area")
model_data <- cbind(model_data, bubble_layout)

# Create a data frame for drawing the bubbles
bubble_data <- circleLayoutVertices(bubble_layout, npoints = 50)

# Generate distinct colors
num_models <- nrow(model_data)
colors <- rainbow(num_models)

# Packed Bubble Chart
ggplot() + 
  geom_polygon(data = bubble_data, aes(x, y, group = id, fill = factor(id)), alpha = 0.7) +
  geom_text(data = model_data, aes(x, y, label = Model), size = 4, color = "black") +
  scale_fill_manual(values = colors) +  
  coord_equal() +  # Keep circles properly proportioned
  labs(title = "Packed Bubble Chart: Tesla-Related Deaths by Model",
       fill = "Model") +
  theme_void() +  
  theme(legend.position = "none")  # Hide legend for cleaner look

Tsla_ <- read_excel("Tsla .xlsx")

ggplot(Tsla_, aes(x = Date, y = `Close/Last`, fill = `Close/Last`)) +
  geom_area(alpha = 0.5, color = "black") +
  scale_fill_viridis_c() +
  labs(title = "Tesla Stock Price Trend",
       x = "Date",
       y = "Stock Price (EOD)") +
  theme_minimal()

ggplot(Tsla_, aes(x = factor(year(Date)), y = `Close/Last`, fill = factor(year(Date)))) +
  geom_violin(alpha = 0.7) +
  scale_fill_viridis_d() +
  labs(title = "Tesla Stock Price Distribution Per Year",
       x = "Year",
       y = "Stock Price (EOD)") +
  theme_minimal()

# Load necessary libraries
library(ggplot2)
library(readxl)
library(dplyr)
library(lubridate)
library(scales)

# Load datasets
tsla_data <- read_excel("Tsla .xlsx")
crash_data <- read_excel("crash.xlsx")

# Convert Date column to Date format
tsla_data$Date <- as.Date(tsla_data$Date)
crash_data$Date <- as.Date(crash_data$Date)

# Extract Month
tsla_data$Month <- floor_date(tsla_data$Date, "month")
crash_data$Month <- floor_date(crash_data$Date, "month")

# Ensure numeric values
tsla_data$`Close/Last` <- as.numeric(tsla_data$`Close/Last`)
crash_data$Deaths <- as.numeric(crash_data$Deaths)

# Summarize Stock Prices (Average per Month)
monthly_stock <- tsla_data %>%
  group_by(Month) %>%
  summarize(Average_Stock_Price = mean(`Close/Last`, na.rm = TRUE))

# Summarize Deaths (Total per Month)
monthly_deaths <- crash_data %>%
  group_by(Month) %>%
  summarize(Total_Deaths = sum(Deaths, na.rm = TRUE))

# Merge both datasets by Month
combined_data <- left_join(monthly_stock, monthly_deaths, by = "Month")

# 🔥 Adjusted Scaling Factor for Better Fit
scaling_factor <- max(combined_data$Average_Stock_Price, na.rm = TRUE) / max(combined_data$Total_Deaths, na.rm = TRUE)

# Plot Combined Graph: Stock Prices (Line) & Deaths (Bars)
ggplot(combined_data, aes(x = Month)) +
  # 🔴 Bar chart for Tesla-related deaths
  geom_bar(aes(y = Total_Deaths * scaling_factor), stat = "identity", fill = "red", alpha = 0.5) +  
  # 🔵 Line graph for Tesla stock prices
  geom_line(aes(y = Average_Stock_Price), color = "blue", size = 1.5) +  
  geom_point(aes(y = Average_Stock_Price), color = "blue", size = 3) +
  # Labels and Titles
  labs(title = "📊 Tesla Stock Prices & Crash-Related Deaths",
       subtitle = "Comparing Tesla's market performance with accident data",
       x = "Month",
       y = "Stock Price ($) / Deaths (scaled)") +
  # Custom Theme
  theme_minimal() +
  scale_x_date(labels = date_format("%b %Y"), breaks = "2 months") +
  # Second y-axis for deaths (scaled dynamically)
  scale_y_continuous(
    name = "Stock Price ($)",
    sec.axis = sec_axis(~ . / scaling_factor, name = "Total Deaths")
  )