1 Berlin Voter’s Map

On 18th September 2016, Berlin voted for the state elections. Using the library “leaflet”, the results of the state election is visualised according to the voting areas.

library(leaflet)
library(maptools)
library(dplyr)
library(sp)
library(rgdal)
library(caret)
berlin_shape <- readOGR("../data/geo/Berlin_shape/UWB.shp", layer = "UWB", encoding = "utf-8", verbose = FALSE)
berlin_map <- spTransform(berlin_shape, CRS("+init=epsg:4326"))

# create column for Winner
z <- apply(result1[18:43], 1, which.max)
result1$Winner <- as.factor(names(result1[18:43])[z])

# level of Winners are in the sequence: AfD CDU FDP GRÜNE LINKE SPD

# create palette colour according to levels in sequence
pal <- colorFactor(c('#00ADEF',
                     '#000000',
                     '#FFED00',
                     '#58AB27',
                     '#8C3473',
                     '#F0001C'), 
                   result1$Winner)

winner_popup <- paste0("<strong>Winner: </strong>", result1$Winner,
                       "<br><strong>Voting area: </strong>", result1$Wahlbezirk, ", ", 
                       result1$Bezirksname,
                       "<br><strong>Number of total votes: </strong>",
                       result1$Gültige.Stimmen
                       
)

berlin_leaflet <- 
berlin_map %>% leaflet() %>% 
  addTiles() %>% 
  addPolygons(
    color = "#FFFFFF",
    fillOpacity=0.6,
    smoothFactor = 0.5,
    weight=0.5,
    fillColor = ~pal(result1$Winner),
    popup = winner_popup,
    group = "Normal transparency"
  ) %>% 
  addPolygons(
    color = "#FFFFFF",
    fillOpacity=0.8,
    smoothFactor = 0.5,
    weight=0.5,
    fillColor = ~pal(result1$Winner),
    popup = winner_popup,
    group = "Low transparency"
  ) %>% 
  addPolygons(
    color = "#FFFFFF",
    fillOpacity=0.3,
    smoothFactor = 0.5,
    weight=0.5,
    fillColor = ~pal(result1$Winner),
    popup = winner_popup,
    group = "High transparency"
  ) %>% 
  addLayersControl(
    position="topright", 
    baseGroups = c("Normal transparency", "Low transparency", "High transparency"),
    options = layersControlOptions(collapsed = FALSE)
  ) %>% 
  addLegend(pal=pal, values = ~result1$Winner, title= "Winning party", opacity=1) %>% 
  setView(lng = 13.40, lat = 52.52, zoom = 10) 

berlin_leaflet

Click on each of the voting area to see more information on the Winning party, Voting area, and Number of votes in that area.

2 How did Berlin vote?

Using classification tree to understand the characteristic of voters’ votes.

library(rpart)
library(rpart.plot)
library(dplyr)
library(rattle)
library(caTools)
library(partykit)

Before plotting the conditional tree, it is imporant to select a optimum tree size. To understand the R-square and relative error of the tree to the number of splits, find the cross-validated error and identify the corresponding complexity parameter.

var <- Winner ~ EastWest + Hartz4 + Foreigner + Born_Berlin + Immigrant_background + Retired + Basic_housing + Average_housing + Good_housing + Refugees_percent + Refugees_percent_immigrant_background

set.seed(16)
tree <- rpart(var, data=df2, method="class")

par(mfrow=c(1,2))
rsq.rpart(tree)
## 
## Classification tree:
## rpart(formula = var, data = df2, method = "class")
## 
## Variables actually used in tree construction:
## [1] Born_Berlin EastWest    Foreigner   Hartz4      Retired    
## 
## Root node error: 488/653 = 0.74732
## 
## n= 653 
## 
##          CP nsplit rel error  xerror     xstd
## 1  0.270492      0   1.00000 1.03074 0.022027
## 2  0.168033      1   0.72951 0.72951 0.026075
## 3  0.096311      2   0.56148 0.56148 0.025842
## 4  0.065574      3   0.46516 0.48361 0.025156
## 5  0.047131      4   0.39959 0.43238 0.024489
## 6  0.024590      5   0.35246 0.40779 0.024103
## 7  0.014344      6   0.32787 0.40984 0.024137
## 8  0.012295      8   0.29918 0.36066 0.023235
## 9  0.010246     10   0.27459 0.36270 0.023276
## 10 0.010000     11   0.26434 0.36270 0.023276

This shows that by pruning the tree and splitting it to 8 nodes, the tree has one of the highest R-square and the lowest relative error. Therefore prune the tree according to the least xerror and it’s related complexity parameters:

ptree<- prune(tree, cp=tree$cptable[which.min(tree$cptable[,"xerror"]),"CP"])
rparty.ptree <- as.party(ptree)

# Create colour scheme for the Winners
columncol2 <- c('#00ADEF',
                '#000000',
                '#58AB27',
                '#8C3473',
                '#F0001C')

plot(rparty.ptree, terminal_panel=node_barplot(rparty.ptree, 
                                     fill = columncol2, 
                                              ylines = 1.2,
                                              gap = 0.02,
                                              ymax = 1.0,
                                              gp = gpar(fontsize = 7)
                                     ))

This shows that if a voting area is in East Berlin, and has less than 9.071% of Retiree, the Winner of that area is Gruene, with a likelihood is more than 80%.

Credit: The voting classification tree and the voting map were inspired by the Berliner Morgenpost, along with the voting tree data as well as the sociodemographic data.

Data source of Berlin vote results are found here.