Data

https://www.auto.swiss/#statistics

library(tidyverse)
library(lubridate)
library(scales)
library(ggrepel)
library(tidyquant)
library(jsonlite)
theme_set(theme_minimal())
invisible(Sys.setlocale("LC_TIME", "en_US.UTF-8"))

library(readxl)
files <- list.files("swisscars")

# Replace German month abbreviations with English equivalents
convert_to_english <- function(date_string) {
  str_replace_all(date_string, c(
    "Jan" = "Jan", "Feb" = "Feb", "Mrz" = "Mar", 
    "Apr" = "Apr", "Mai" = "May", "Jun" = "Jun",
    "Jul" = "Jul", "Aug" = "Aug", "Sep" = "Sep",
    "Okt" = "Oct", "Nov" = "Nov", "Dez" = "Dec",
    "Januar" = "Jan", "Februar" = "Feb", "März" = "Mar",
    "April" = "Apr", "Mai" = "May", "Juni" = "Jun",
    "Juli" = "Jul", "August" = "Aug", "September" = "Sep",
    "Oktober" = "Oct", "November" = "Nov", "Dezember" = "Dec",
    "Sept" = "Sep", "Febr" = "Feb", "Mär" = "Mar"
  ))
}

cardata <- tibble()
for (file in files) {
  sheets <- readxl::excel_sheets(paste0("swisscars/", file))
  for (sheet in sheets) {
    out <- readxl::read_excel(paste0("swisscars/", file), sheet = sheet, skip = 15) |> 
      select(1:3) |> 
      janitor::clean_names() |> 
      # file for example ModellePW2013.xls we need to extract the year from the file name
      mutate(year = str_extract(file, "\\d{4}") |> as.integer(),
             month = convert_to_english(sheet),
             date = ymd(paste(year, month, "01"), locale = "de_CH.UTF-8"))
    
    cardata <- bind_rows(cardata, out)
  }
}

cardata <- cardata |> 
  rename(marke = marke_1, modell = modell_2,
         anz = anzahl_3) |> 
  filter(marke != "Total",
         !is.na(anz)) |> 
  mutate(marke = ifelse(marke == "Volkswagen", "VW", marke)) |> 
  arrange(date, marke, modell) |> 
  group_by(year, marke, modell) |> 
  mutate(a = case_when(row_number() == 1 ~ anz,
                       T ~ anz - lag(anz))) |> 
  ungroup()

cardata <- cardata |> 
  mutate(anz = a) |> 
  select(-a)

Manufacturers

cardata |> 
  group_by(marke, year) |>
  summarise(anz = sum(anz)) |> 
  group_by(year) |> 
  mutate(rank = rank(-anz)) |> 
  mutate(marke1 = ifelse(rank <= 5, marke, "Andere")) |> 
  group_by(year, marke1) |>
  summarise(anz = sum(anz)) |>
  # filter(year == 2016)
  ggplot(aes(x = year, y = anz, fill = reorder(marke1, anz))) +
  geom_col() +
  scale_y_continuous(labels = scales::comma) +
  scale_x_continuous(breaks = seq(2000, 2050, 1), expand = c(0,0)) +
  # theme(legend.position = "none") +
  scale_fill_manual(values = c("#999999", "#E69F00", "#56B4E9", "#009E73", "#F0E442", "#0072B2", "#D55E00", "#CC79A7", "#999999")) +
  labs(title = "Top 5 Automarken in der Schweiz",
       subtitle = "Neuzulassungen",
       x = NULL,
       y = "Anzahl",
       fill = "Marke")

VW, BMW, Mercedes, Audi

cardata |> 
  group_by(marke, year) |>
  summarise(anz = sum(anz)) |>
  group_by(year) |>
  mutate(anteil = anz / sum(anz)) |> 
  filter(marke %in% c("VW", "BMW", "Mercedes-Benz", "Audi")) |>
  ggplot(aes(x = year, y = anteil, color = marke)) +
  geom_line() +
  scale_y_continuous(labels = scales::percent, breaks = seq(0, 1, 0.01)) +
  scale_x_continuous(breaks = seq(2000, 2050, 1), expand = c(0,0)) +
  scale_color_manual(values = c("#999999", "#E69F00", "#56B4E9", "#009E73")) +
  labs(title = "Marktanteil pro Jahr",
       subtitle = "Neuzulassungen",
       x = NULL,
       y = "Anteil",
       color = "Marke")

cardata |> 
  group_by(marke, date) |>
  summarise(anz = sum(anz)) |>
  group_by(date) |>
  mutate(anteil = anz / sum(anz)) |> 
  filter(marke %in% c("VW", "BMW", "Mercedes-Benz", "Audi")) |>
  ggplot(aes(x = date, y = anteil, color = marke)) +
  geom_line() +
  scale_y_continuous(labels = scales::percent) +
  scale_x_date(date_breaks = "1 year", date_labels = "%Y") +
  scale_color_manual(values = c("#999999", "#E69F00", "#56B4E9", "#009E73")) +
  labs(title = "Marktanteil pro Monat",
       subtitle = "Neuzulassungen",
       x = NULL,
       y = "Anteil",
       color = "Marke")

VW, Mercedes und Audi haben stake jährliche Zyklen, BMW auch aber schwächer. Was kann der Grund sein?

cardata |> 
  group_by(marke, date) |>
  summarise(anz = sum(anz)) |>
  group_by(date) |>
  mutate(anteil = anz / sum(anz)) |> 
  filter(marke %in% c("VW", "BMW", "Mercedes-Benz", "Audi")) |>
  ggplot(aes(x = date, y = anteil, color = marke)) +
  geom_line() +
  scale_y_continuous(labels = scales::percent) +
  scale_x_date(date_breaks = "2 year", date_labels = "%Y") +
  scale_color_manual(values = c("#999999", "#E69F00", "#56B4E9", "#009E73")) +
  labs(title = "Marktanteil pro Monat",
       subtitle = "Neuzulassungen",
       x = NULL,
       y = "Anteil",
       color = "Marke") +
  theme(legend.position = "none") +
  facet_wrap(~marke)

cardata |> 
  filter(year < 2024) |> 
  group_by(marke, date, month) |>
  summarise(anz = sum(anz)) |>
  group_by(date, month) |>
  mutate(anteil = anz / sum(anz)) |> 
  filter(marke %in% c("VW", "BMW", "Mercedes-Benz", "Audi")) |> 
  group_by(marke, month) |> 
  summarise(anteil = mean(anteil)) |> 
  ungroup() |> 
  mutate(month = factor(month, levels = month.abb)) |>
  ggplot(aes(x = month, y = anteil, fill = marke)) +
  geom_col(position = "dodge") +
  scale_y_continuous(labels = scales::percent) +
  scale_fill_manual(values = c("#999999", "#E69F00", "#56B4E9", "#009E73")) +
  facet_wrap(~marke) +
  labs(title = "Marktanteil pro Monat",
       subtitle = "Neuzulassungen",
       x = NULL,
       y = "Anteil",
       fill = "Marke")

VWs stärkster Monat ist Dezember, ebenfalls Juli.

Mercedes ist am stärksten in Januar und September.

Audi Anfang Jahr am schwächsten, dann stetig steigend mit Höhepunkt im Dezember.

BMW wie erwähnt schwächerer Zyklus, aber im Januar am stärksten.

Neuzulassungen insgesamt

cardata |> 
  group_by(date) |>
  summarise(anz = sum(anz)) |>
  ggplot(aes(x = date, y = anz)) +
  geom_line() +
  scale_y_continuous(labels = scales::comma) +
  # scale_x_date(date_breaks = "1 year", date_labels = "%Y") +
  labs(title = "Neuzulassungen pro Monat",
       subtitle = "Alle Marken",
       x = NULL,
       y = "Anzahl")

cardata |> 
  filter(year < 2024) |>
  group_by(month) |>
  summarise(anz = sum(anz)) |>
  mutate(month = factor(month, levels = month.abb)) |>
  ggplot(aes(x = month, y = anz)) +
  geom_col() +
  scale_y_continuous(labels = scales::comma) +
  labs(title = "Neuzulassungen pro Monat",
       subtitle = "Alle Marken",
       x = NULL,
       y = "Anzahl")

Schwäche im August ist wohl durch Sommerferien zu erklären. Januar schwach weil auf Ende Jahr nicht viele neue Bestellungen. Dezember Aufholjagd.

Tesla

cardata |> 
  filter(marke == "Tesla") |> 
  group_by(date, modell) |>
  summarise(anz = sum(anz)) |>
  ggplot(aes(x = date, y = anz, fill = modell)) +
  geom_col() +
  scale_y_continuous(labels = scales::comma) +
  labs(title = "Neuzulassungen pro Monat",
       subtitle = "Tesla",
       x = NULL,
       y = "Anzahl")

Mercedes

sumup_bottom <- function(df, n = 10, sum_col = "anz", div_name = "Diverse") {
  df %>%
    group_by(date) %>% 
    mutate(rank = row_number(desc(!!sym(sum_col)))) %>% # Rank within each group
    mutate(modell = ifelse(rank > n, div_name, modell)) %>% # Rename to "Diverse" if rank > n
    group_by(date, modell) %>%
    summarise(anz = sum(!!sym(sum_col)), .groups = "drop") %>% # Summarize "Diverse" categories
    arrange(date, desc(anz)) # Arrange for better readability
}

cardata |> 
  filter(marke == "Mercedes-Benz") |> 
  group_by(date, modell) |>
  summarise(anz = sum(anz)) |>
  sumup_bottom(n = 3) |> 
  ggplot(aes(x = date, y = anz, fill = modell)) +
  geom_col() +
  scale_y_continuous(labels = scales::comma) +
  # we need 14 distingt colors
  scale_fill_manual(values = c("#999999", "#E69F00", "#56B4E9", "#009E73", 
                               "#F0E442", "#0072B2", "#D55E00", "#CC79A7", 
                               "#999999", "#E69F00", "#56B4E9", "#009E73", 
                               "#F0E442", "#0072B2")) +
  labs(title = "Neuzulassungen pro Monat",
       subtitle = "Mercedes",
       x = NULL,
       y = "Anzahl")