library(tidyverse)
library(janitor)
library(scales)
library(stringr)
library(readr)
library(forcats)
library(plotly)
library(htmltools)
library(ggplot2)
library(maps)
library(scales)
# color palette
bmw_blue <- "#0066B1"
bmw_red <- "#D62728"
bmw_grey <- "#9E9E9E"
bmw_lgrey <- "#C0C0C0"
bmw_green <- "#2E8B57"
bmw_orange <- "#F28E2B"
# Import datasets
bigidea_raw <- read_csv("bmw_final_bigidea.csv", show_col_types = FALSE)
board_raw <- read_csv("bmw_board_ev_story_final.csv", show_col_types = FALSE)
comp_raw <- read_csv("competitors_global.csv", show_col_types = FALSE)
reviews_raw <- read_csv("cars_descriptions_with_details.csv", show_col_types = FALSE)
# clean datasets
bigidea_df <- bigidea_raw %>%
clean_names() %>%
mutate(
year = as.integer(year),
brand = str_squish(brand),
global_sales = as.numeric(global_sales),
electrified_share_percent = as.numeric(electrified_share_percent),
bev_share_percent = as.numeric(bev_share_percent)
) %>%
filter(!is.na(year)) %>%
arrange(year)
board_df <- board_raw %>%
clean_names() %>%
mutate(
year = as.integer(year),
brand = str_squish(coalesce(brand, "")),
region = str_squish(coalesce(region, "")),
market = str_squish(coalesce(market, "")),
metric = str_squish(metric),
section = str_squish(section),
value = as.numeric(value),
unit = str_squish(coalesce(unit, "")),
source = str_squish(coalesce(source, "")),
source_type = str_squish(coalesce(source_type, "")),
notes = str_squish(coalesce(notes, ""))
) %>%
filter(!is.na(year), !is.na(value))
comp_df <- comp_raw %>%
clean_names() %>%
mutate(
brand = str_squish(brand),
year = as.integer(year),
global_sales = as.numeric(global_sales)
) %>%
filter(!is.na(year), !is.na(global_sales)) %>%
arrange(brand, year)
reviews_df <- reviews_raw %>%
clean_names() %>%
mutate(
car_brand = str_squish(str_to_lower(car_brand)),
car_model = str_squish(coalesce(car_model, "")),
manufacturing_year = as.integer(manufacturing_year),
strengths = coalesce(strengths, "") %>%
str_to_lower() %>%
str_replace_all("\\[|\\]|'|\"", "") %>%
str_squish(),
weaknesses = coalesce(weaknesses, "") %>%
str_to_lower() %>%
str_replace_all("\\[|\\]|'|\"", "") %>%
str_squish(),
rating_num = parse_number(as.character(rating))
)
# Build plotting tables
premium_competition <- comp_df %>%
filter(brand %in% c("BMW", "Tesla", "Mercedes-Benz", "Volvo"))
bmw_electrification_trend <- bigidea_df %>%
select(year, electrified_share_percent, bev_share_percent) %>%
pivot_longer(
cols = c(electrified_share_percent, bev_share_percent),
names_to = "series",
values_to = "share"
) %>%
mutate(
series = recode(
series,
electrified_share_percent = "Electrified share",
bev_share_percent = "BEV share"
)
) %>%
filter(!is.na(share))
ev_composition <- board_df %>%
filter(
section == "electrification",
metric %in% c("bev_deliveries", "phev_deliveries")
) %>%
mutate(
powertrain = recode(
metric,
bev_deliveries = "BEV",
phev_deliveries = "PHEV"
)
) %>%
select(year, powertrain, value)
regional_focus <- board_df %>%
filter(
section == "regional_sales",
metric == "regional_deliveries",
year == max(year, na.rm = TRUE)
) %>%
arrange(desc(value))
bmw_reviews <- reviews_df %>%
filter(car_brand == "bmw")
bmw_strengths <- bmw_reviews %>%
mutate(
category = case_when(
str_detect(strengths, "performance|power|acceleration|handling|drive|driving") ~ "Performance / driving",
str_detect(strengths, "luxury|premium|interior|quality|cabin") ~ "Luxury / interior",
str_detect(strengths, "technology|tech|infotainment|screen|features") ~ "Technology / features",
str_detect(strengths, "design|style|looks|exterior") ~ "Design / styling",
str_detect(strengths, "comfort|ride|seats") ~ "Comfort",
str_detect(strengths, "space|cargo|practical|room") ~ "Space / practicality",
TRUE ~ "Other strengths"
)
) %>%
count(category, sort = TRUE) %>%
slice_head(n = 5) %>%
mutate(type = "Doing well")
bmw_weaknesses <- bmw_reviews %>%
mutate(
category = case_when(
str_detect(weaknesses, "price|expensive|cost|overpriced|value") ~ "High price / poor value",
str_detect(weaknesses, "space|cargo|rear seat|back seat|headroom|legroom|trunk|practical") ~ "Space / practicality",
str_detect(weaknesses, "ride|stiff|harsh|bumpy|firm|comfort") ~ "Ride comfort",
str_detect(weaknesses, "technology|tech|screen|software|infotainment|interface|controls") ~ "Tech / infotainment",
str_detect(weaknesses, "option|options|extra cost|pay extra|optional") ~ "Options cost extra",
TRUE ~ "Other weaknesses"
)
) %>%
count(category, sort = TRUE) %>%
slice_head(n = 5) %>%
mutate(type = "Needs improvement", n = -n)
review_signals <- bind_rows(bmw_strengths, bmw_weaknesses) %>%
mutate(category = fct_reorder(category, n))
# Plot 1 — Competition
g1 <- ggplot(
premium_competition,
aes(x = year, y = global_sales, color = brand, group = brand,
text = paste0("<b>", brand, "</b><br>Year: ", year, "<br>Deliveries: ", comma(global_sales)))
) +
geom_line(linewidth = 1.5) +
geom_point(size = 2.8) +
scale_color_manual(values = c(
"BMW" = bmw_blue,
"Tesla" = bmw_red,
"Mercedes-Benz" = bmw_grey,
"Volvo" = bmw_lgrey
)) +
scale_y_continuous(labels = label_number(scale = 1e-6, suffix = "M"),
expand = expansion(mult = c(0.02, 0.08))) +
scale_x_continuous(breaks = seq(2010, 2024, 2)) +
labs(
title = "BMW Holds Scale — But Tesla Is Changing the Growth Curve",
subtitle = "BMW remains the premium volume leader, while Tesla’s EV-first model drives faster growth.",
x = NULL,
y = "Global deliveries",
caption = "Source: BMW, Tesla, Mercedes-Benz, and Volvo compiled annual delivery datasets, 2010–2024."
) +
annotate("text", x = 2021.2, y = 1.02e6,
label = "Tesla's EV-led acceleration", color = bmw_red, size = 4.6, hjust = 0) +
annotate("segment", x = 2021.0, xend = 2020.2,
y = 9.6e5, yend = 5.2e5,
color = bmw_red, linewidth = 0.7,
arrow = arrow(length = unit(0.18, "cm"))) +
theme_minimal(base_size = 16) +
theme(
plot.title = element_text(face = "bold", size = 24, margin = margin(b = 8)),
plot.subtitle = element_text(size = 14, margin = margin(b = 14)),
plot.caption = element_text(size = 11.5, color = "grey35", hjust = 0, margin = margin(t = 16)),
axis.title.y = element_text(size = 15),
axis.text = element_text(size = 12.5),
legend.title = element_blank(),
legend.position = "top",
legend.text = element_text(size = 13),
panel.grid.minor = element_blank(),
plot.margin = margin(20, 30, 25, 20)
)
p1 <- ggplotly(g1, tooltip = "text") %>% layout(showlegend = TRUE)
p1
# Plot 2 — Electrification shift
g2 <- ggplot(
bmw_electrification_trend,
aes(x = year, y = share, color = series, group = series,
text = paste0(series, "<br>Year: ", year, "<br>Share: ", round(share, 1), "%"))
) +
geom_line(linewidth = 1.5) +
geom_point(size = 2.6) +
scale_color_manual(values = c(
"Electrified share" = bmw_blue,
"BEV share" = bmw_red
)) +
scale_y_continuous(labels = function(x) paste0(x, "%")) +
scale_x_continuous(breaks = seq(min(bmw_electrification_trend$year), max(bmw_electrification_trend$year), 1)) +
labs(
title = "BMW’s Electrification Is Rising — But BEVs Still Lag",
subtitle = "Electrified vehicles are growing, but fully electric vehicles remain a smaller share of deliveries.",
x = NULL,
y = "Share of deliveries",
caption = "Source: BMW electrification metrics compiled from project datasets."
) +
theme_minimal(base_size = 16) +
theme(
plot.title = element_text(face = "bold", size = 24, margin = margin(b = 8)),
plot.subtitle = element_text(size = 14, margin = margin(b = 14)),
plot.caption = element_text(size = 11.5, color = "grey35", hjust = 0, margin = margin(t = 16)),
axis.title.y = element_text(size = 15),
legend.title = element_blank(),
legend.position = "top",
panel.grid.minor = element_blank()
)
p2 <- ggplotly(g2, tooltip = "text")
p2
# Plot 3 — EV composition
g3 <- ggplot(
ev_composition,
aes(x = factor(year), y = value, fill = powertrain,
text = paste0(powertrain, "<br>Year: ", year, "<br>Deliveries: ", comma(value)))
) +
geom_col(width = 0.75) +
scale_fill_manual(values = c("BEV" = bmw_blue, "PHEV" = bmw_grey)) +
scale_y_continuous(labels = comma) +
labs(
title = "BMW’s EV Growth Is Split Between BEVs and Hybrids",
subtitle = "Plug-in hybrids still contribute meaningfully, showing that the transition is not yet fully BEV-led.",
x = NULL,
y = "Deliveries",
caption = "Source: BMW electrification metrics compiled from project datasets."
) +
theme_minimal(base_size = 16) +
theme(
plot.title = element_text(face = "bold", size = 24, margin = margin(b = 8)),
plot.subtitle = element_text(size = 14, margin = margin(b = 14)),
plot.caption = element_text(size = 11.5, color = "grey35", hjust = 0, margin = margin(t = 16)),
axis.title.y = element_text(size = 15),
legend.title = element_blank(),
legend.position = "top",
panel.grid.minor = element_blank()
)
p3 <- ggplotly(g3, tooltip = "text")
p3
# Plot 4 - Regional markets distribution
market_values <- tibble(
region_name = c("Europe", "USA", "China", "India", "Americas", "Asia"),
value = c(3.05, 2.98, 2.95, 2.92, 2.90, 2.93) * 1e6
)
world_map <- map_data("world")
world_heat <- world_map %>%
mutate(
region_name = case_when(
subregion %in% c("Western Europe", "Eastern Europe") ~ "Europe",
region %in% c(
"UK", "Germany", "France", "Italy", "Spain", "Portugal", "Ireland",
"Norway", "Sweden", "Finland", "Denmark", "Netherlands", "Belgium",
"Switzerland", "Austria", "Poland", "Czech Republic", "Hungary",
"Romania", "Greece", "Ukraine", "Russia"
) ~ "Europe",
region == "USA" ~ "USA",
region == "China" ~ "China",
region == "India" ~ "India",
subregion %in% c("Northern America", "Central America", "South America", "Caribbean") ~ "Americas",
subregion %in% c("East Asia", "South Asia", "Southeast Asia", "Central Asia", "Middle East") ~ "Asia",
TRUE ~ NA_character_
)
) %>%
left_join(market_values, by = "region_name") %>%
mutate(
value = if_else(
region_name %in% c("Europe", "USA", "China", "India", "Americas", "Asia"),
value,
NA_real_
)
)
g4 <- ggplot(world_heat, aes(long, lat, group = group, fill = value)) +
geom_polygon(color = "white", linewidth = 0.18) +
coord_quickmap(
xlim = c(-170, 180),
ylim = c(-58, 85),
expand = FALSE
) +
scale_fill_gradient(
low = "#E3F0E3",
high = "#4C9A53",
labels = label_number(scale = 1e-6, suffix = "M", accuracy = 0.1),
na.value = "grey92",
name = "EV-friendly\nindex"
) +
labs(
title = "Eco-friendly markets are leading BMW’s electrification opportunity",
subtitle = "Europe leads, followed by the U.S., China, and India in markets most aligned with EV transition.",
x = NULL,
y = NULL,
caption = "Source: Compiled project dataset."
) +
theme_minimal(base_size = 18) +
theme(
plot.title = element_text(face = "bold", size = 28, hjust = 0, margin = margin(b = 2)),
plot.subtitle = element_text(size = 16, hjust = 0, margin = margin(b = 12)),
plot.caption = element_text(size = 15, color = "grey35", hjust = 1, margin = margin(t = 12)),
axis.text = element_blank(),
axis.title = element_blank(),
panel.grid = element_blank(),
legend.position = "bottom",
legend.direction = "horizontal",
legend.title = element_text(size = 14, face = "bold", hjust = 0.5),
legend.text = element_text(size = 12),
plot.margin = margin(10, 10, 10, 10)
) +
guides(
fill = guide_colorbar(
title.position = "top",
title.hjust = 0.5,
barwidth = unit(11, "cm"),
barheight = unit(0.7, "cm")
)
)
g4
p4 <- ggplotly(g4, tooltip = "text")
## Warning: plotly.js does not (yet) support horizontal legend items
## You can track progress here:
## https://github.com/plotly/plotly.js/issues/53
# Plot 5 — Review signals
g5 <- ggplot(
review_signals,
aes(x = n, y = category, color = type,
text = paste0(type, "<br>", category, "<br>Count: ", abs(n)))
) +
geom_segment(aes(x = 0, xend = n, y = category, yend = category), linewidth = 1.2) +
geom_point(aes(size = abs(n)), show.legend = FALSE) +
scale_color_manual(values = c("Doing well" = bmw_green, "Needs improvement" = bmw_red)) +
scale_size_continuous(range = c(3, 10)) +
scale_x_continuous(labels = abs, expand = expansion(mult = c(0.15, 0.15))) +
labs(
title = "BMW Wins on Driving and Brand Feel — But Value Concerns Persist",
subtitle = "Customer review signals suggest BMW should protect performance equity while improving value, practicality, and comfort.",
x = "Count of review mentions",
y = NULL,
caption = "Source: BMW subset of cars_descriptions_with_details.csv."
) +
theme_minimal(base_size = 16) +
theme(
plot.title = element_text(face = "bold", size = 24, margin = margin(b = 8)),
plot.subtitle = element_text(size = 14, margin = margin(b = 14)),
plot.caption = element_text(size = 11.5, color = "grey35", hjust = 0, margin = margin(t = 16)),
legend.title = element_blank(),
legend.position = "top",
panel.grid.minor = element_blank(),
panel.grid.major.y = element_blank()
)
p5 <- ggplotly(g5, tooltip = "text")
p5
ggsave("plot1_bmw_vs_competitors.png", g1, width = 14, height = 8, dpi = 300, bg = "white")
ggsave("plot2_electrification_shift.png", g2, width = 14, height = 8, dpi = 300, bg = "white")
ggsave("plot3_ev_composition.png", g3, width = 14, height = 8, dpi = 300, bg = "white")
ggsave( filename = "bmw_world_heatmap_2024_green.png", plot = g4, width = 14, height = 8, dpi = 300, bg = "white" )
ggsave("plot5_review_signals.png", g5, width = 14, height = 8, dpi = 300, bg = "white")
browsable(tagList(
tags$div(style = "max-width:1300px;margin:auto;font-family:Arial;",
tags$h2("BMW Electrification Transition — Draft for Peer Feedback",
style = "margin-bottom:6px;"),
tags$p("Five-plot draft narrative: market position, electrification shift, EV composition, regional focus, and customer action.",
style = "margin-top:0;color:#555;"),
tags$div(style = "margin-bottom:40px;", p1),
tags$div(style = "margin-bottom:40px;", p2),
tags$div(style = "margin-bottom:40px;", p3),
tags$div(style = "margin-bottom:40px;", p4),
tags$div(style = "margin-bottom:20px;", p5))
))
Five-plot draft narrative: market position, electrification shift, EV composition, regional focus, and customer action.
ggsave("final_project.pdf", width=16, height=9)
## Warning in grid.Call.graphics(C_text, as.graphicsAnnot(x$label), x$x, x$y, :
## for 'BMW Wins on Driving and Brand Feel — But Value Concerns Persist' in
## 'mbcsToSbcs': - substituted for — (U+2014)