# Load necessary libraries
pacman::p_load(pacman, tidyverse, ggplot2, scales, readr, knitr, gridExtra, grid)

# Load the data
IEA_Global_EV_Data_2024 <- read_csv("IEA Global EV Data 2024.csv")
## Rows: 12654 Columns: 8
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (6): region, category, parameter, mode, powertrain, unit
## dbl (2): year, value
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
# Define region groups
region_mapping <- function(region) {
  if (region %in% c("USA", "Canada", "Mexico")) {
    return("North America")
  } else if (region %in% c("Brazil", "Argentina", "Chile", "Colombia", "Peru")) {
    return("South America")
  } else if (region == "China") {
    return("China")
  } else if (region %in% c("UK", "United Kingdom")) {
    return("UK")
  } else if (region %in% c("Germany", "France", "Italy", "Spain", "Netherlands", "Belgium", "Sweden", "Norway", "Finland", "Denmark", "Switzerland", "Austria", "Ireland", "Portugal", "Greece", "Luxembourg", "Poland", "Czech Republic", "Hungary", "Slovakia", "Slovenia")) {
    return("Europe")
  } else if (region %in% c("Australia", "New Zealand")) {
    return("Australia")
  } else if (region %in% c("Bulgaria", "Costa Rica", "Croatia", "Cyprus", "Estonia", "Iceland", "India", "Indonesia", "Israel", "Japan", "Korea", "Latvia", "Lithuania", "Romania", "Seychelles", "South Africa", "Thailand", "Turkiye", "United Arab Emirates")) {
    return("Other")
  } else if (region == "World") {
    return("World")
  } else {
    return(NA)
  }
}

# Apply region groups to the dataset and filter out NAs
IEA_Global_EV_Data_2024 <- IEA_Global_EV_Data_2024 %>%
  mutate(Grouped_Region = sapply(region, region_mapping)) %>%
  filter(!is.na(Grouped_Region))

# Converting 'year' to numeric for comparison and back to factor for plotting
IEA_Global_EV_Data_2024 <- IEA_Global_EV_Data_2024 %>%
  mutate(year = as.numeric(as.character(year)))

# Create a summary table of the grouped regions and their countries
grouped_regions_summary <- IEA_Global_EV_Data_2024 %>%
  distinct(region, Grouped_Region) %>%
  group_by(Grouped_Region) %>%
  summarize(Countries = paste(unique(region), collapse = ", ")) %>%
  ungroup()

# Prepare data for pie chart
ev_sales_pie_data <- IEA_Global_EV_Data_2024 %>%
  filter(parameter == "EV sales") %>%
  group_by(powertrain) %>%
  summarize(total_sales = sum(value)) %>%
  ungroup() %>%
  mutate(percentage = total_sales / sum(total_sales) * 100)  # Compute percentage

# Create plots
ev_sales_stacked_plot <- IEA_Global_EV_Data_2024 %>%
  filter(parameter == "EV sales", Grouped_Region %in% c("China", "North America", "Other", "World")) %>%
  mutate(prediction_flag = ifelse(year > 2023, "Predicted", "Actual")) %>%
  mutate(year = as.factor(year)) %>%
  ggplot(aes(x = year, y = value, fill = Grouped_Region)) +
  geom_bar(stat = "identity", position = "stack", aes(alpha = prediction_flag)) +
  scale_fill_brewer(palette = "Set1") +
  scale_alpha_manual(values = c("Actual" = 1, "Predicted" = 0.5), guide = guide_legend(title = "Prediction")) +
  labs(title = "EV Sales Over Time by Grouped Region",
       x = "Year",
       y = "Number of EV Sales") +
  scale_y_continuous(labels = scales::comma) +
  theme_minimal() +
  theme(axis.text.x = element_text(angle = 45, hjust = 1))


ev_sales_share_plot <- ggplot(IEA_Global_EV_Data_2024 %>% filter(parameter == "EV sales share"), 
                              aes(x = Grouped_Region, y = value, fill = powertrain)) +
  geom_bar(stat = "identity", position = "dodge", fill = "#0066ff") +  # Bar color
  labs(title = "EV Sales Share by Grouped Region and Powertrain",
       x = "Grouped Region",
       y = "EV Sales Share (%)") +
  scale_y_continuous(labels = scales::percent_format(scale = 1)) +  # Formatting y-axis
  theme_minimal()


ev_sales_pie_chart <- ggplot(ev_sales_pie_data, aes(x = "", y = total_sales, fill = powertrain)) +
  geom_bar(width = 1, stat = "identity") +
  coord_polar("y", start = 0) +
  scale_fill_brewer(palette = "Set1") +
  labs(title = "Worldwide Distribution of EV Sales by Powertrain Type") +
  theme_void() +
  geom_text(aes(label = paste0(round(percentage, 1), "%")),  # Add labels with percentages
            position = position_stack(vjust = 0.5), size = 5)  # Adjust label position and size

# Convert the summary table to a grob (graphical object) and adjust font size
table_grob <- tableGrob(grouped_regions_summary, theme = ttheme_minimal(
  core = list(
    fg_params = list(fontsize = 5)  # Reduce font size
  ),
  colhead = list(
    fg_params = list(fontsize = 6)  # Adjust column header font size
  )
))

# Create the title grob
title_grob <- textGrob("EV Sales Analysis: 2010-2024", 
                       gp = gpar(fontsize = 14, fontface = "bold"))

# Arrange plots and table with the title
grid.arrange(
  title_grob,
  arrangeGrob(
    ev_sales_stacked_plot, 
    ev_sales_share_plot, 
    ev_sales_pie_chart, 
    table_grob,
    ncol = 2,
    nrow = 2,
    layout_matrix = rbind(c(1, 2),c(3, 4))
  ),
  heights = c(0.1, 1)  # Adjust the height ratio of the title and plots
)