It’s no secret that Germany is known for its automotive industry. Cars are one of the country’s most important export and the German automotive industry employs hundreds of thousands of people. Interestingly, German car buyers exhibit a strong preference for cars build in their home region. Volkswagen (VW) cars, which are produced in the northern state of Lower Saxony, are vastly popular in the German northwest. Conversely, BMWs, which are produced in the vicinity of Munich, are relatively popular in Bavaria. Even more interesting is the case of the east of Germany, but more on that later. Below is a screenshot of a handy shiny application that visualizes where different car makers have their regional strongholds in the German automobile market. As with my earlier Coronavirus dashboard, you can click on the image to be redirected to the actual dashboard.
The data for this project come from the German Kraftfahrt-Bundesamt (KBA), which is Germany’s federal motor transport authority. The raw data comprise two excel files. The first details all new vehicle registrations in 2019. The second one summarizes all existing vehicle registrations. Let’s import and clean these data:
rm(list = ls())
# library
library(readxl)
library(tidyverse)
'%!in%' <- function(x,y)!('%in%'(x,y))
# import excel files
df1 <- read_excel("data/raw_total.xlsx", sheet="FZ 2.3", range = "C8:V89", na="-",
col_types = c("text","skip", rep("numeric",18)))
head(df1) # all registrations
## # A tibble: 6 x 19
## `Hersteller\r\n` `Baden-\r\nWürt… Bayern Berlin `Branden-\r\nbu… Bremen
## <chr> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 Herstellerbezei… NA NA NA NA NA
## 2 ALPINA 1019 1378 119 63 22
## 3 ASTON MARTIN (G… 611 957 184 37 22
## 4 AUDI (D) 472525 695564 63142 75614 15027
## 5 AUDI (H) 19554 23844 1712 1299 447
## 6 AUTOMOB-EISENAC… 119 149 225 1504 10
## # … with 13 more variables: Hamburg <dbl>, Hessen <dbl>,
## # `Mecklenburg-\r\nVorpommern` <dbl>, `Nieder-\r\nsachsen` <dbl>,
## # `Nordrhein-\r\nWestfalen` <dbl>, `Rheinland-\r\nPfalz` <dbl>,
## # Saarland <dbl>, Sachsen <dbl>, `Sachsen-\r\nAnhalt` <dbl>,
## # `Schleswig-\r\nHolstein` <dbl>, Thüringen <dbl>, Sonstige <dbl>,
## # Deutschland <dbl>
df2 <- read_excel("data/raw_new.xlsx", sheet="FZ 4.3", range = "C8:V73", na="-",
col_types = c("text","skip", rep("numeric",18)))
head(df2) # new registrations in 2019
## # A tibble: 6 x 19
## Hersteller `Baden-\r\nWürt… Bayern Berlin `Branden-\r\nbu… Bremen Hamburg
## <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 Herstelle… NA NA NA NA NA NA
## 2 ALPINA 95 124 16 4 1 16
## 3 ASTON MAR… 62 87 21 3 1 28
## 4 AUDI (D) 43606 90363 4634 2449 979 12285
## 5 AUDI (H) 700 1762 33 13 4 43
## 6 BEIJING B… 48 2 NA 2 3 NA
## # … with 12 more variables: Hessen <dbl>, `Mecklenburg-\r\nVorpommern` <dbl>,
## # `Nieder-\r\nsachsen` <dbl>, `Nordrhein-\r\nWestfalen` <dbl>,
## # `Rheinland-\r\nPfalz` <dbl>, Saarland <dbl>, Sachsen <dbl>,
## # `Sachsen-\r\nAnhalt` <dbl>, `Schleswig-\r\nHolstein` <dbl>,
## # Thüringen <dbl>, Sonstige <dbl>, Deutschland <dbl>
# remove empty row
df1 <- df1[-1,]
df2 <- df2[-1,]
# simpler variable names
names(df1) <- c("make","BW","BY","BE","BB","HB","HH","HE","MV","NI","NW",
"RP","SL","SN","ST","SH","TH","other","total")
names(df2) <- names(df1)
# remove country codes from brand names
df1$make <- gsub("\\s*\\([^\\)]+\\)","",df1$make)
df2$make <- gsub("\\s*\\([^\\)]+\\)","",df2$make)
# replace NA entries with zeros
df1[is.na(df1)] <- 0
df2[is.na(df2)] <- 0
# simplify brand names: total registrations
df1$make[df1$make == "AUTOMOB-EISENACH-AWE"] <- "AWE"
df1$make[df1$make == "DAIMLER"] <- "MERCEDES"
df1$make[df1$make == "MERCEDES-BENZ"] <- "MERCEDES"
df1$make[df1$make == "FCA"] <- "FCA (FIAT)"
df1$make[df1$make == "GENERAL MOT-GMC"] <- "GENERAL MOTORS"
df1$make[df1$make == "HONDA MOTOR"] <- "HONDA"
df1$make[df1$make == "HYUNDAI MOTOR"] <- "HYUNDAI"
df1$make[df1$make == "KIA MOTOR"] <- "KIA"
df1$make[df1$make == "KIA MOTORS"] <- "KIA"
df1$make[df1$make == "MAGYAR SUZUKI"] <- "SUZUKI"
df1$make[df1$make == "NISSAN EUROPE"] <- "NISSAN"
df1$make[df1$make == "PSA AUTOMOBILES"] <- "PSA"
df1$make[df1$make == "TOYOTA EUROPE"] <- "TOYOTA"
df1$make[df1$make == "SAAB,-SCANIA"] <- "SAAB-SCANIA"
df1$make[df1$make == "SUBARU-FUJI HEAVY"] <- "SUBARU"
df1$make[df1$make == "VAZ-LADA"] <- "LADA"
df1$make[df1$make == "VOLKSWAGEN"] <- "VW"
df1$make[df1$make == "SONSTIGE HERSTELLER"] <- "OTHER"
# simplify brand names: new registrations
df2$make[df2$make == "DAIMLER"] <- "MERCEDES"
df2$make[df2$make == "FCA"] <- "FCA (FIAT)"
df2$make[df2$make == "HONDA MOTOR"] <- "HONDA"
df2$make[df2$make == "HYUNDAI MOTOR"] <- "HYUNDAI"
df2$make[df2$make == "KIA MOTOR"] <- "KIA"
df2$make[df2$make == "KIA MOTORS"] <- "KIA"
df2$make[df2$make == "MAGYAR SUZUKI"] <- "SUZUKI"
df2$make[df2$make == "MAGYAR SUZUKI"] <- "SUZUKI"
df2$make[df2$make == "MAN TRUCK & BUS"] <- "MAN"
df2$make[df2$make == "PSA AUTOMOBILES"] <- "PSA"
df2$make[df2$make == "RENAULT TRUCKS"] <- "RENAULT"
df2$make[df2$make == "SUBARU-FUJI HEAVY"] <- "SUBARU"
df2$make[df2$make == "VAZ-LADA"] <- "LADA"
df2$make[df2$make == "TOYOTA EUROPE"] <- "TOYOTA"
df2$make[df2$make == "VOLKSWAGEN"] <- "VW"
df2$make[df2$make == "VOLKSWAGEN-VWOA"] <- "VW"
df2$make[df2$make == "SONSTIGE HERSTELLER"] <- "OTHER"
# aggregate over different subsidiaries of the same brand
df1 <- aggregate(df1 %>% select(-make), by=list(df1$make), FUN=sum)
df2 <- aggregate(df2 %>% select(-make), by=list(df2$make), FUN=sum)
names(df1)[1] <- "make"
names(df2)[1] <- "make"
Now, we are left with two clean dataframe that summarize how many cars of each brand are registered in which state of Germany or have been registered there in 2019.
df1[1:10,1:10]
## make BW BY BE BB HB HH HE MV NI
## 1 ALPINA 1019 1378 119 63 22 149 564 27 492
## 2 ASTON MARTIN 611 957 184 37 22 245 541 19 255
## 3 AUDI 492079 719408 64854 76913 15474 51887 260929 46020 309182
## 4 AWE 119 149 225 1504 10 12 64 633 105
## 5 BENTLEY 524 879 308 50 52 232 402 25 328
## 6 BMW 555720 923854 86721 68830 22506 71182 318755 36536 281867
## 7 CAMI 623 533 49 90 7 30 352 39 312
## 8 CHEVROLET 15624 24303 6511 9310 1651 5040 12205 5432 17171
## 9 CHRYSLER 930 1037 264 297 33 159 543 117 722
## 10 CITROEN 112410 84511 21872 29378 5312 12988 56351 15980 74759
df2[1:10,1:10]
## make BW BY BE BB HB HH HE MV NI
## 1 ALPINA 95 124 16 4 1 16 65 2 42
## 2 ASTON MARTIN 62 87 21 3 1 28 52 2 15
## 3 AUDI 44306 92125 4667 2462 983 12328 17766 1633 13768
## 4 BEIJING BORGWARD 48 2 0 2 3 0 2 0 11
## 5 BENTLEY 91 139 41 4 7 100 79 5 84
## 6 BMW 39272 130558 6429 2920 1726 10024 25089 1712 14660
## 7 BRABUS 0 1 2 0 0 1 0 0 3
## 8 CHRYSLER 2 5 0 0 0 0 0 0 18
## 9 CITROEN 6800 7420 1042 1488 311 2101 6734 629 3967
## 10 CONCORDE 7 8 1 3 0 0 2 0 3
We notice that not all brands, e.g., AWE or BEIJING BORGWARD, are present in both dataframes. This is because some brands (like AWE) have not been registered in 2019, or others (like BEIJING BORGWARD) have just entered the market. To keep things simple, we will subset our data and focus on some of the more prominent car brands. Moreover, we will store the two dataframes in a single array which we export in the form of a .RDS file.
Now, we can develop a dashboard to visualize these data. The geospatial data for the German states come from the Global Administrative Areas (GADM) database. The steps to create the dashbaord are pretty much the same as with the earlier Coronavirus dashboard, so I will not walk you through them in a great level of detail. If you are unfamiliar with dashboards/shiny objects created in R, just check out my earlier tutorial on the Coronavirus dashboard.
In our automobile brands dashboards, we will have a map that shows where a brand has what market share within Germany. Note that we will use the same color spectrum for each brand. So don’t confuse Volkswagen’s enormous market shares with those of Porsche, just because they are displayed in the same color. The colors will really only tell in which states a particular brand is important relative to how (un-)important it is in other states. To understand a brand’s overall importance, we will include a little pie chart that illustrates the brand’s overall market share in the German automobile market. Lastly, a simple bar chart will display the five highest market shares this brand has in the different states of Germany. On the left, the user will be able to select which brand he or she is interested in and whether the dashboard should display the data for all registered vehicles or only those that have been registered in 2019.
rm(list = ls())
# library
library(sp)
library(tidyverse)
library(leaflet)
library(shiny)
library(shinydashboard)
# import data
shape <- readRDS("data/shapefile.rds") # shape file
regis <- readRDS("data/registrations.RDS") # registration data
brandnames <- dimnames(regis)[[1]]
# compute percentages
regis_pct <- regis
sums1 <- apply(regis,c(2,3),sum)[,"total"]
sums2 <- apply(regis,c(2,3),sum)[,"new"]
for(i in 1:dim(regis)[[1]]) regis_pct[i,,"total"] <- regis[i,,"total"] / sums1 * 100
for(i in 1:dim(regis)[[1]]) regis_pct[i,,"new"] <- regis[i,,"new"] / sums2 * 100
rm(regis,sums1,sums2,i)
# add percentages data to shape file
shape1 <- shape
shape2 <- shape
for(i in dimnames(regis_pct)[[1]]) shape1@data[,i] <- regis_pct[i,1:16,"total"]
for(i in dimnames(regis_pct)[[1]]) shape2@data[,i] <- regis_pct[i,1:16,"new"]
rm(shape,i)
ui <- dashboardPage(
# layout
skin = "red",
# header
dashboardHeader(
titleWidth = 250,
title = "Car brands in Germany"
),
# sidebar
dashboardSidebar(
width = 250,
radioButtons("make","Brand:",brandnames),
radioButtons("stat","Statistic:",c("New registrations 2019"="total",
"Total registrations"="new"))
),
# body
dashboardBody(
tags$style(type = "text/css", "#map {height: calc(100vh - 80px) !important;}"),
box(title = "Regional Distribution",
leafletOutput("map")),
box(title = "Market share total",
plotOutput("pie")),
box(title = "Highest market shares",
plotOutput("bar"))
)
)
server <- function(input, output, session){
# get inputs
stat <- reactive(input$stat)
make <- reactive(input$make)
# create outputs
observe({
# select correct data for map
if(stat()=="total") shape <- shape1
if(stat()=="new") shape <- shape2
# draw map
map <- leaflet(shape) %>%
addPolygons(color = "#444444", weight = 1, smoothFactor = 0.5,
opacity = 1.0, fillOpacity = 0.5,
fillColor = ~colorQuantile("YlOrRd",shape@data[,make()])(shape@data[,make()]),
highlightOptions = highlightOptions(color = "black", weight = 2,
bringToFront = TRUE))
# send map to output
output$map <- renderLeaflet(map)
# draw pie chart
x <- regis_pct[,"total",stat()]
y <- sum(x) - x[make()]
x <- c(x[make()],y)
names(x) <- c(make(),"Others")
# send pie chart to output
output$pie <- renderPlot({
par(mar = rep(0,4))
pie(x,
clockwise = T,
init.angle = 90,
labels = paste(names(x),": ",round(x,2),"%",sep=""),
col = c("tomato","snow2"))
})
# bar chart
z <- regis_pct[make(),1:16,stat()]
output$bar <- renderPlot({
barplot(sort(z, decreasing = T)[1:5],
ylab="Market share in %", xlab="")
})
})
}
# run shiny
shinyApp(ui, server, options=list(launch.browser=T))