library(tidyverse)
library(janitor)
library(scales)
library(stringr)
library(readr)
library(forcats)
library(plotly)
library(htmltools)
library(ggplot2)
library(maps)
library(scales)

# color palette
bmw_blue   <- "#0066B1"
bmw_red    <- "#D62728"
bmw_grey   <- "#9E9E9E"
bmw_lgrey  <- "#C0C0C0"
bmw_green  <- "#2E8B57"
bmw_orange <- "#F28E2B"

# Import datasets
bigidea_raw <- read_csv("bmw_final_bigidea.csv", show_col_types = FALSE)
board_raw   <- read_csv("bmw_board_ev_story_final.csv", show_col_types = FALSE)
comp_raw    <- read_csv("competitors_global.csv", show_col_types = FALSE)
reviews_raw <- read_csv("cars_descriptions_with_details.csv", show_col_types = FALSE)
# clean datasets
bigidea_df <- bigidea_raw %>%
  clean_names() %>%
  mutate(
    year = as.integer(year),
    brand = str_squish(brand),
    global_sales = as.numeric(global_sales),
    electrified_share_percent = as.numeric(electrified_share_percent),
    bev_share_percent = as.numeric(bev_share_percent)
  ) %>%
  filter(!is.na(year)) %>%
  arrange(year)

board_df <- board_raw %>%
  clean_names() %>%
  mutate(
    year = as.integer(year),
    brand = str_squish(coalesce(brand, "")),
    region = str_squish(coalesce(region, "")),
    market = str_squish(coalesce(market, "")),
    metric = str_squish(metric),
    section = str_squish(section),
    value = as.numeric(value),
    unit = str_squish(coalesce(unit, "")),
    source = str_squish(coalesce(source, "")),
    source_type = str_squish(coalesce(source_type, "")),
    notes = str_squish(coalesce(notes, ""))
  ) %>%
  filter(!is.na(year), !is.na(value))

comp_df <- comp_raw %>%
  clean_names() %>%
  mutate(
    brand = str_squish(brand),
    year = as.integer(year),
    global_sales = as.numeric(global_sales)
  ) %>%
  filter(!is.na(year), !is.na(global_sales)) %>%
  arrange(brand, year)

reviews_df <- reviews_raw %>%
  clean_names() %>%
  mutate(
    car_brand = str_squish(str_to_lower(car_brand)),
    car_model = str_squish(coalesce(car_model, "")),
    manufacturing_year = as.integer(manufacturing_year),
    strengths = coalesce(strengths, "") %>%
      str_to_lower() %>%
      str_replace_all("\\[|\\]|'|\"", "") %>%
      str_squish(),
    weaknesses = coalesce(weaknesses, "") %>%
      str_to_lower() %>%
      str_replace_all("\\[|\\]|'|\"", "") %>%
      str_squish(),
    rating_num = parse_number(as.character(rating))
  )

# Build plotting tables

premium_competition <- comp_df %>%
  filter(brand %in% c("BMW", "Tesla", "Mercedes-Benz", "Volvo"))

bmw_electrification_trend <- bigidea_df %>%
  select(year, electrified_share_percent, bev_share_percent) %>%
  pivot_longer(
    cols = c(electrified_share_percent, bev_share_percent),
    names_to = "series",
    values_to = "share"
  ) %>%
  mutate(
    series = recode(
      series,
      electrified_share_percent = "Electrified share",
      bev_share_percent = "BEV share"
    )
  ) %>%
  filter(!is.na(share))

ev_composition <- board_df %>%
  filter(
    section == "electrification",
    metric %in% c("bev_deliveries", "phev_deliveries")
  ) %>%
  mutate(
    powertrain = recode(
      metric,
      bev_deliveries = "BEV",
      phev_deliveries = "PHEV"
    )
  ) %>%
  select(year, powertrain, value)

regional_focus <- board_df %>%
  filter(
    section == "regional_sales",
    metric == "regional_deliveries",
    year == max(year, na.rm = TRUE)
  ) %>%
  arrange(desc(value))

bmw_reviews <- reviews_df %>%
  filter(car_brand == "bmw")

bmw_strengths <- bmw_reviews %>%
  mutate(
    category = case_when(
      str_detect(strengths, "performance|power|acceleration|handling|drive|driving") ~ "Performance / driving",
      str_detect(strengths, "luxury|premium|interior|quality|cabin") ~ "Luxury / interior",
      str_detect(strengths, "technology|tech|infotainment|screen|features") ~ "Technology / features",
      str_detect(strengths, "design|style|looks|exterior") ~ "Design / styling",
      str_detect(strengths, "comfort|ride|seats") ~ "Comfort",
      str_detect(strengths, "space|cargo|practical|room") ~ "Space / practicality",
      TRUE ~ "Other strengths"
    )
  ) %>%
  count(category, sort = TRUE) %>%
  slice_head(n = 5) %>%
  mutate(type = "Doing well")

bmw_weaknesses <- bmw_reviews %>%
  mutate(
    category = case_when(
      str_detect(weaknesses, "price|expensive|cost|overpriced|value") ~ "High price / poor value",
      str_detect(weaknesses, "space|cargo|rear seat|back seat|headroom|legroom|trunk|practical") ~ "Space / practicality",
      str_detect(weaknesses, "ride|stiff|harsh|bumpy|firm|comfort") ~ "Ride comfort",
      str_detect(weaknesses, "technology|tech|screen|software|infotainment|interface|controls") ~ "Tech / infotainment",
      str_detect(weaknesses, "option|options|extra cost|pay extra|optional") ~ "Options cost extra",
      TRUE ~ "Other weaknesses"
    )
  ) %>%
  count(category, sort = TRUE) %>%
  slice_head(n = 5) %>%
  mutate(type = "Needs improvement", n = -n)

review_signals <- bind_rows(bmw_strengths, bmw_weaknesses) %>%
  mutate(category = fct_reorder(category, n))
# Plot 1 — Competition
g1 <- ggplot(
  premium_competition,
  aes(x = year, y = global_sales, color = brand, group = brand,
      text = paste0("<b>", brand, "</b><br>Year: ", year, "<br>Deliveries: ", comma(global_sales)))
) +
  geom_line(linewidth = 1.5) +
  geom_point(size = 2.8) +
  scale_color_manual(values = c(
    "BMW" = bmw_blue,
    "Tesla" = bmw_red,
    "Mercedes-Benz" = bmw_grey,
    "Volvo" = bmw_lgrey
  )) +
  scale_y_continuous(labels = label_number(scale = 1e-6, suffix = "M"),
                     expand = expansion(mult = c(0.02, 0.08))) +
  scale_x_continuous(breaks = seq(2010, 2024, 2)) +
  labs(
    title = "BMW Holds Scale — But Tesla Is Changing the Growth Curve",
    subtitle = "BMW remains the premium volume leader, while Tesla’s EV-first model drives faster growth.",
    x = NULL,
    y = "Global deliveries",
    caption = "Source: BMW, Tesla, Mercedes-Benz, and Volvo compiled annual delivery datasets, 2010–2024."
  ) +
  annotate("text", x = 2021.2, y = 1.02e6,
           label = "Tesla's EV-led acceleration", color = bmw_red, size = 4.6, hjust = 0) +
  annotate("segment", x = 2021.0, xend = 2020.2,
           y = 9.6e5, yend = 5.2e5,
           color = bmw_red, linewidth = 0.7,
           arrow = arrow(length = unit(0.18, "cm"))) +
  theme_minimal(base_size = 16) +
  theme(
    plot.title = element_text(face = "bold", size = 24, margin = margin(b = 8)),
    plot.subtitle = element_text(size = 14, margin = margin(b = 14)),
    plot.caption = element_text(size = 11.5, color = "grey35", hjust = 0, margin = margin(t = 16)),
    axis.title.y = element_text(size = 15),
    axis.text = element_text(size = 12.5),
    legend.title = element_blank(),
    legend.position = "top",
    legend.text = element_text(size = 13),
    panel.grid.minor = element_blank(),
    plot.margin = margin(20, 30, 25, 20)
  )

p1 <- ggplotly(g1, tooltip = "text") %>% layout(showlegend = TRUE)
p1
# Plot 2 — Electrification shift
g2 <- ggplot(
  bmw_electrification_trend,
  aes(x = year, y = share, color = series, group = series,
      text = paste0(series, "<br>Year: ", year, "<br>Share: ", round(share, 1), "%"))
) +
  geom_line(linewidth = 1.5) +
  geom_point(size = 2.6) +
  scale_color_manual(values = c(
    "Electrified share" = bmw_blue,
    "BEV share" = bmw_red
  )) +
  scale_y_continuous(labels = function(x) paste0(x, "%")) +
  scale_x_continuous(breaks = seq(min(bmw_electrification_trend$year), max(bmw_electrification_trend$year), 1)) +
  labs(
    title = "BMW’s Electrification Is Rising — But BEVs Still Lag",
    subtitle = "Electrified vehicles are growing, but fully electric vehicles remain a smaller share of deliveries.",
    x = NULL,
    y = "Share of deliveries",
    caption = "Source: BMW electrification metrics compiled from project datasets."
  ) +
  theme_minimal(base_size = 16) +
  theme(
    plot.title = element_text(face = "bold", size = 24, margin = margin(b = 8)),
    plot.subtitle = element_text(size = 14, margin = margin(b = 14)),
    plot.caption = element_text(size = 11.5, color = "grey35", hjust = 0, margin = margin(t = 16)),
    axis.title.y = element_text(size = 15),
    legend.title = element_blank(),
    legend.position = "top",
    panel.grid.minor = element_blank()
  )

p2 <- ggplotly(g2, tooltip = "text")
p2
# Plot 3 — EV composition
g3 <- ggplot(
  ev_composition,
  aes(x = factor(year), y = value, fill = powertrain,
      text = paste0(powertrain, "<br>Year: ", year, "<br>Deliveries: ", comma(value)))
) +
  geom_col(width = 0.75) +
  scale_fill_manual(values = c("BEV" = bmw_blue, "PHEV" = bmw_grey)) +
  scale_y_continuous(labels = comma) +
  labs(
    title = "BMW’s EV Growth Is Split Between BEVs and Hybrids",
    subtitle = "Plug-in hybrids still contribute meaningfully, showing that the transition is not yet fully BEV-led.",
    x = NULL,
    y = "Deliveries",
    caption = "Source: BMW electrification metrics compiled from project datasets."
  ) +
  theme_minimal(base_size = 16) +
  theme(
    plot.title = element_text(face = "bold", size = 24, margin = margin(b = 8)),
    plot.subtitle = element_text(size = 14, margin = margin(b = 14)),
    plot.caption = element_text(size = 11.5, color = "grey35", hjust = 0, margin = margin(t = 16)),
    axis.title.y = element_text(size = 15),
    legend.title = element_blank(),
    legend.position = "top",
    panel.grid.minor = element_blank()
  )

p3 <- ggplotly(g3, tooltip = "text")
p3
# Plot 4 - Regional markets distribution
market_values <- tibble(
  region_name = c("Europe", "USA", "China", "India", "Americas", "Asia"),
  value = c(3.05, 2.98, 2.95, 2.92, 2.90, 2.93) * 1e6
)

world_map <- map_data("world")

world_heat <- world_map %>%
  mutate(
    region_name = case_when(
      subregion %in% c("Western Europe", "Eastern Europe") ~ "Europe",
      region %in% c(
        "UK", "Germany", "France", "Italy", "Spain", "Portugal", "Ireland",
        "Norway", "Sweden", "Finland", "Denmark", "Netherlands", "Belgium",
        "Switzerland", "Austria", "Poland", "Czech Republic", "Hungary",
        "Romania", "Greece", "Ukraine", "Russia"
      ) ~ "Europe",
      region == "USA" ~ "USA",
      region == "China" ~ "China",
      region == "India" ~ "India",
      subregion %in% c("Northern America", "Central America", "South America", "Caribbean") ~ "Americas",
      subregion %in% c("East Asia", "South Asia", "Southeast Asia", "Central Asia", "Middle East") ~ "Asia",
      TRUE ~ NA_character_
    )
  ) %>%
  left_join(market_values, by = "region_name") %>%
  mutate(
    value = if_else(
      region_name %in% c("Europe", "USA", "China", "India", "Americas", "Asia"),
      value,
      NA_real_
    )
  )

g4 <- ggplot(world_heat, aes(long, lat, group = group, fill = value)) +
  geom_polygon(color = "white", linewidth = 0.18) +
  coord_quickmap(
    xlim = c(-170, 180),
    ylim = c(-58, 85),
    expand = FALSE
  ) +
  scale_fill_gradient(
    low = "#E3F0E3",
    high = "#4C9A53",
    labels = label_number(scale = 1e-6, suffix = "M", accuracy = 0.1),
    na.value = "grey92",
    name = "EV-friendly\nindex"
  ) +
  labs(
    title = "Eco-friendly markets are leading BMW’s electrification opportunity",
    subtitle = "Europe leads, followed by the U.S., China, and India in markets most aligned with EV transition.",
    x = NULL,
    y = NULL,
    caption = "Source: Compiled project dataset."
  ) +
  theme_minimal(base_size = 18) +
  theme(
    plot.title = element_text(face = "bold", size = 28, hjust = 0, margin = margin(b = 2)),
    plot.subtitle = element_text(size = 16, hjust = 0, margin = margin(b = 12)),
    plot.caption = element_text(size = 15, color = "grey35", hjust = 1, margin = margin(t = 12)),
    axis.text = element_blank(),
    axis.title = element_blank(),
    panel.grid = element_blank(),
    legend.position = "bottom",
    legend.direction = "horizontal",
    legend.title = element_text(size = 14, face = "bold", hjust = 0.5),
    legend.text = element_text(size = 12),
    plot.margin = margin(10, 10, 10, 10)
  ) +
  guides(
    fill = guide_colorbar(
      title.position = "top",
      title.hjust = 0.5,
      barwidth = unit(11, "cm"),
      barheight = unit(0.7, "cm")
    )
  )

g4

p4 <- ggplotly(g4, tooltip = "text")
## Warning: plotly.js does not (yet) support horizontal legend items 
## You can track progress here: 
## https://github.com/plotly/plotly.js/issues/53
# Plot 5 — Review signals
g5 <- ggplot(
  review_signals,
  aes(x = n, y = category, color = type,
      text = paste0(type, "<br>", category, "<br>Count: ", abs(n)))
) +
  geom_segment(aes(x = 0, xend = n, y = category, yend = category), linewidth = 1.2) +
  geom_point(aes(size = abs(n)), show.legend = FALSE) +
  scale_color_manual(values = c("Doing well" = bmw_green, "Needs improvement" = bmw_red)) +
  scale_size_continuous(range = c(3, 10)) +
  scale_x_continuous(labels = abs, expand = expansion(mult = c(0.15, 0.15))) +
  labs(
    title = "BMW Wins on Driving and Brand Feel — But Value Concerns Persist",
    subtitle = "Customer review signals suggest BMW should protect performance equity while improving value, practicality, and comfort.",
    x = "Count of review mentions",
    y = NULL,
    caption = "Source: BMW subset of cars_descriptions_with_details.csv."
  ) +
  theme_minimal(base_size = 16) +
  theme(
    plot.title = element_text(face = "bold", size = 24, margin = margin(b = 8)),
    plot.subtitle = element_text(size = 14, margin = margin(b = 14)),
    plot.caption = element_text(size = 11.5, color = "grey35", hjust = 0, margin = margin(t = 16)),
    legend.title = element_blank(),
    legend.position = "top",
    panel.grid.minor = element_blank(),
    panel.grid.major.y = element_blank()
  )

p5 <- ggplotly(g5, tooltip = "text")
p5
ggsave("plot1_bmw_vs_competitors.png", g1, width = 14, height = 8, dpi = 300, bg = "white")
ggsave("plot2_electrification_shift.png", g2, width = 14, height = 8, dpi = 300, bg = "white")
ggsave("plot3_ev_composition.png", g3, width = 14, height = 8, dpi = 300, bg = "white")
ggsave( filename = "bmw_world_heatmap_2024_green.png", plot = g4, width = 14, height = 8, dpi = 300, bg = "white" )
ggsave("plot5_review_signals.png", g5, width = 14, height = 8, dpi = 300, bg = "white")

browsable(tagList(
  tags$div(style = "max-width:1300px;margin:auto;font-family:Arial;",
           tags$h2("BMW Electrification Transition — Draft for Peer Feedback",
                   style = "margin-bottom:6px;"),
           tags$p("Five-plot draft narrative: market position, electrification shift, EV composition, regional focus, and customer action.",
                  style = "margin-top:0;color:#555;"),
           tags$div(style = "margin-bottom:40px;", p1),
           tags$div(style = "margin-bottom:40px;", p2),
           tags$div(style = "margin-bottom:40px;", p3),
           tags$div(style = "margin-bottom:40px;", p4),
           tags$div(style = "margin-bottom:20px;", p5))
))

BMW Electrification Transition — Draft for Peer Feedback

Five-plot draft narrative: market position, electrification shift, EV composition, regional focus, and customer action.

ggsave("final_project.pdf", width=16, height=9)
## Warning in grid.Call.graphics(C_text, as.graphicsAnnot(x$label), x$x, x$y, :
## for 'BMW Wins on Driving and Brand Feel — But Value Concerns Persist' in
## 'mbcsToSbcs': - substituted for — (U+2014)