https://www.auto.swiss/#statistics
library(tidyverse)
library(lubridate)
library(scales)
library(ggrepel)
library(tidyquant)
library(jsonlite)
theme_set(theme_minimal())
invisible(Sys.setlocale("LC_TIME", "en_US.UTF-8"))
library(readxl)
files <- list.files("swisscars")
# Replace German month abbreviations with English equivalents
convert_to_english <- function(date_string) {
str_replace_all(date_string, c(
"Jan" = "Jan", "Feb" = "Feb", "Mrz" = "Mar",
"Apr" = "Apr", "Mai" = "May", "Jun" = "Jun",
"Jul" = "Jul", "Aug" = "Aug", "Sep" = "Sep",
"Okt" = "Oct", "Nov" = "Nov", "Dez" = "Dec",
"Januar" = "Jan", "Februar" = "Feb", "März" = "Mar",
"April" = "Apr", "Mai" = "May", "Juni" = "Jun",
"Juli" = "Jul", "August" = "Aug", "September" = "Sep",
"Oktober" = "Oct", "November" = "Nov", "Dezember" = "Dec",
"Sept" = "Sep", "Febr" = "Feb", "Mär" = "Mar"
))
}
cardata <- tibble()
for (file in files) {
sheets <- readxl::excel_sheets(paste0("swisscars/", file))
for (sheet in sheets) {
out <- readxl::read_excel(paste0("swisscars/", file), sheet = sheet, skip = 15) |>
select(1:3) |>
janitor::clean_names() |>
# file for example ModellePW2013.xls we need to extract the year from the file name
mutate(year = str_extract(file, "\\d{4}") |> as.integer(),
month = convert_to_english(sheet),
date = ymd(paste(year, month, "01"), locale = "de_CH.UTF-8"))
cardata <- bind_rows(cardata, out)
}
}
cardata <- cardata |>
rename(marke = marke_1, modell = modell_2,
anz = anzahl_3) |>
filter(marke != "Total",
!is.na(anz)) |>
mutate(marke = ifelse(marke == "Volkswagen", "VW", marke)) |>
arrange(date, marke, modell) |>
group_by(year, marke, modell) |>
mutate(a = case_when(row_number() == 1 ~ anz,
T ~ anz - lag(anz))) |>
ungroup()
cardata <- cardata |>
mutate(anz = a) |>
select(-a)
cardata |>
group_by(marke, year) |>
summarise(anz = sum(anz)) |>
group_by(year) |>
mutate(rank = rank(-anz)) |>
mutate(marke1 = ifelse(rank <= 5, marke, "Andere")) |>
group_by(year, marke1) |>
summarise(anz = sum(anz)) |>
# filter(year == 2016)
ggplot(aes(x = year, y = anz, fill = reorder(marke1, anz))) +
geom_col() +
scale_y_continuous(labels = scales::comma) +
scale_x_continuous(breaks = seq(2000, 2050, 1), expand = c(0,0)) +
# theme(legend.position = "none") +
scale_fill_manual(values = c("#999999", "#E69F00", "#56B4E9", "#009E73", "#F0E442", "#0072B2", "#D55E00", "#CC79A7", "#999999")) +
labs(title = "Top 5 Automarken in der Schweiz",
subtitle = "Neuzulassungen",
x = NULL,
y = "Anzahl",
fill = "Marke")
cardata |>
group_by(marke, year) |>
summarise(anz = sum(anz)) |>
group_by(year) |>
mutate(anteil = anz / sum(anz)) |>
filter(marke %in% c("VW", "BMW", "Mercedes-Benz", "Audi")) |>
ggplot(aes(x = year, y = anteil, color = marke)) +
geom_line() +
scale_y_continuous(labels = scales::percent, breaks = seq(0, 1, 0.01)) +
scale_x_continuous(breaks = seq(2000, 2050, 1), expand = c(0,0)) +
scale_color_manual(values = c("#999999", "#E69F00", "#56B4E9", "#009E73")) +
labs(title = "Marktanteil pro Jahr",
subtitle = "Neuzulassungen",
x = NULL,
y = "Anteil",
color = "Marke")
cardata |>
group_by(marke, date) |>
summarise(anz = sum(anz)) |>
group_by(date) |>
mutate(anteil = anz / sum(anz)) |>
filter(marke %in% c("VW", "BMW", "Mercedes-Benz", "Audi")) |>
ggplot(aes(x = date, y = anteil, color = marke)) +
geom_line() +
scale_y_continuous(labels = scales::percent) +
scale_x_date(date_breaks = "1 year", date_labels = "%Y") +
scale_color_manual(values = c("#999999", "#E69F00", "#56B4E9", "#009E73")) +
labs(title = "Marktanteil pro Monat",
subtitle = "Neuzulassungen",
x = NULL,
y = "Anteil",
color = "Marke")
VW, Mercedes und Audi haben stake jährliche Zyklen, BMW auch aber schwächer. Was kann der Grund sein?
cardata |>
group_by(marke, date) |>
summarise(anz = sum(anz)) |>
group_by(date) |>
mutate(anteil = anz / sum(anz)) |>
filter(marke %in% c("VW", "BMW", "Mercedes-Benz", "Audi")) |>
ggplot(aes(x = date, y = anteil, color = marke)) +
geom_line() +
scale_y_continuous(labels = scales::percent) +
scale_x_date(date_breaks = "2 year", date_labels = "%Y") +
scale_color_manual(values = c("#999999", "#E69F00", "#56B4E9", "#009E73")) +
labs(title = "Marktanteil pro Monat",
subtitle = "Neuzulassungen",
x = NULL,
y = "Anteil",
color = "Marke") +
theme(legend.position = "none") +
facet_wrap(~marke)
cardata |>
filter(year < 2024) |>
group_by(marke, date, month) |>
summarise(anz = sum(anz)) |>
group_by(date, month) |>
mutate(anteil = anz / sum(anz)) |>
filter(marke %in% c("VW", "BMW", "Mercedes-Benz", "Audi")) |>
group_by(marke, month) |>
summarise(anteil = mean(anteil)) |>
ungroup() |>
mutate(month = factor(month, levels = month.abb)) |>
ggplot(aes(x = month, y = anteil, fill = marke)) +
geom_col(position = "dodge") +
scale_y_continuous(labels = scales::percent) +
scale_fill_manual(values = c("#999999", "#E69F00", "#56B4E9", "#009E73")) +
facet_wrap(~marke) +
labs(title = "Marktanteil pro Monat",
subtitle = "Neuzulassungen",
x = NULL,
y = "Anteil",
fill = "Marke")
VWs stärkster Monat ist Dezember, ebenfalls Juli.
Mercedes ist am stärksten in Januar und September.
Audi Anfang Jahr am schwächsten, dann stetig steigend mit Höhepunkt im Dezember.
BMW wie erwähnt schwächerer Zyklus, aber im Januar am stärksten.
cardata |>
group_by(date) |>
summarise(anz = sum(anz)) |>
ggplot(aes(x = date, y = anz)) +
geom_line() +
scale_y_continuous(labels = scales::comma) +
# scale_x_date(date_breaks = "1 year", date_labels = "%Y") +
labs(title = "Neuzulassungen pro Monat",
subtitle = "Alle Marken",
x = NULL,
y = "Anzahl")
cardata |>
filter(year < 2024) |>
group_by(month) |>
summarise(anz = sum(anz)) |>
mutate(month = factor(month, levels = month.abb)) |>
ggplot(aes(x = month, y = anz)) +
geom_col() +
scale_y_continuous(labels = scales::comma) +
labs(title = "Neuzulassungen pro Monat",
subtitle = "Alle Marken",
x = NULL,
y = "Anzahl")
Schwäche im August ist wohl durch Sommerferien zu erklären. Januar schwach weil auf Ende Jahr nicht viele neue Bestellungen. Dezember Aufholjagd.
cardata |>
filter(marke == "Tesla") |>
group_by(date, modell) |>
summarise(anz = sum(anz)) |>
ggplot(aes(x = date, y = anz, fill = modell)) +
geom_col() +
scale_y_continuous(labels = scales::comma) +
labs(title = "Neuzulassungen pro Monat",
subtitle = "Tesla",
x = NULL,
y = "Anzahl")
sumup_bottom <- function(df, n = 10, sum_col = "anz", div_name = "Diverse") {
df %>%
group_by(date) %>%
mutate(rank = row_number(desc(!!sym(sum_col)))) %>% # Rank within each group
mutate(modell = ifelse(rank > n, div_name, modell)) %>% # Rename to "Diverse" if rank > n
group_by(date, modell) %>%
summarise(anz = sum(!!sym(sum_col)), .groups = "drop") %>% # Summarize "Diverse" categories
arrange(date, desc(anz)) # Arrange for better readability
}
cardata |>
filter(marke == "Mercedes-Benz") |>
group_by(date, modell) |>
summarise(anz = sum(anz)) |>
sumup_bottom(n = 3) |>
ggplot(aes(x = date, y = anz, fill = modell)) +
geom_col() +
scale_y_continuous(labels = scales::comma) +
# we need 14 distingt colors
scale_fill_manual(values = c("#999999", "#E69F00", "#56B4E9", "#009E73",
"#F0E442", "#0072B2", "#D55E00", "#CC79A7",
"#999999", "#E69F00", "#56B4E9", "#009E73",
"#F0E442", "#0072B2")) +
labs(title = "Neuzulassungen pro Monat",
subtitle = "Mercedes",
x = NULL,
y = "Anzahl")