In this notebook we will create an aligned UMAP for Swedish parliamentary election results on a municipality level.

First, load the necessary packages:

pacman::p_load(tidyverse, 
               janitor, # Useful tools, functions used: clean_names and remove_constant
               hablar, # Nice complement to janitor, function used: retype
               pxweb, # Interface for the API Statistics Sweden uses
               reticulate, # Python bridge
               tictoc, # Stop watch
               plotly, # Interactive plots
               gganimate) # Animated plots

If UMAP 0.5 (development version) isn’t installed you can install it with this code:

py_install("git+https://github.com/lmcinnes/umap.git@43181dec02e3932da9f7a0ee9c12abee5265d7b0", pip = TRUE)

Then we get the data from Statistics Sweden, and clean it up:

get_pxweb_data(url = "http://api.scb.se/OV0104/v1/doris/sv/ssd/ME/ME0104/ME0104C/ME0104T3",
               dims = list(Region = c('0114', '0115', '0117', '0120', '0123', '0125', '0126', '0127', '0128', '0136', '0138', '0139', '0140', '0160', '0162', '0163', '0180', '0181', '0182', '0183', '0184', '0186', '0187', '0188', '0191', '0192', '0305', '0319', '0330', '0331', '0360', '0380', '0381', '0382', '0428', '0461', '0480', '0481', '0482', '0483', '0484', '0486', '0488', '0509', '0512', '0513', '0560', '0561', '0562', '0563', '0580', '0581', '0582', '0583', '0584', '0586', '0604', '0617', '0642', '0643', '0662', '0665', '0680', '0682', '0683', '0684', '0685', '0686', '0687', '0760', '0761', '0763', '0764', '0765', '0767', '0780', '0781', '0821', '0834', '0840', '0860', '0861', '0862', '0880', '0881', '0882', '0883', '0884', '0885', '0980', '1060', '1080', '1081', '1082', '1083', '1214', '1229', '1230', '1231', '1233', '1256', '1257', '1260', '1261', '1262', '1263', '1264', '1265', '1266', '1267', '1270', '1272', '1273', '1275', '1276', '1277', '1278', '1280', '1281', '1282', '1283', '1284', '1285', '1286', '1287', '1290', '1291', '1292', '1293', '1315', '1380', '1381', '1382', '1383', '1384', '1401', '1402', '1407', '1415', '1419', '1421', '1427', '1430', '1435', '1438', '1439', '1440', '1441', '1442', '1443', '1444', '1445', '1446', '1447', '1452', '1460', '1461', '1462', '1463', '1465', '1466', '1470', '1471', '1472', '1473', '1480', '1481', '1482', '1484', '1485', '1486', '1487', '1488', '1489', '1490', '1491', '1492', '1493', '1494', '1495', '1496', '1497', '1498', '1499', '1715', '1730', '1737', '1760', '1761', '1762', '1763', '1764', '1765', '1766', '1780', '1781', '1782', '1783', '1784', '1785', '1814', '1860', '1861', '1862', '1863', '1864', '1880', '1881', '1882', '1883', '1884', '1885', '1904', '1907', '1960', '1961', '1962', '1980', '1981', '1982', '1983', '1984', '2021', '2023', '2026', '2029', '2031', '2034', '2039', '2061', '2062', '2080', '2081', '2082', '2083', '2084', '2085', '2101', '2104', '2121', '2132', '2161', '2180', '2181', '2182', '2183', '2184', '2260', '2262', '2280', '2281', '2282', '2283', '2284', '2303', '2305', '2309', '2313', '2321', '2326', '2361', '2380', '2401', '2403', '2404', '2409', '2417', '2418', '2421', '2422', '2425', '2460', '2462', '2463', '2480', '2481', '2482', '2505', '2506', '2510', '2513', '2514', '2518', '2521', '2523', '2560', '2580', '2581', '2582', '2583', '2584'),
                           Partimm = c('M', 'C', 'FP', 'KD', 'MP', 'S', 'V', 'SD', 'ÖVRIGA'),
                           ContentsCode = c('ME0104B7'),
                           Tid = c('*')),
               clean = TRUE) %>% 
  as_tibble() %>% 
  clean_names() %>% 
  retype() %>% 
  remove_constant() %>% 
  drop_na(values) %>%  
  mutate(values = values/100) %>% 
  spread(parti_mm, values, fill = 0) %>% 
  clean_names() %>% 
  separate(region, c("kommunid","kommun"), sep = " ", extra = "merge") -> 
  valresultat_kommuner

Some words in Swedish that are good to know:

kommun(er) - municipality(ies)
tid - time (used for election year)
valresultat - election result

Then we create the slices and the dictionary:

valresultat_kommuner %>% 
  arrange(tid, kommunid) %>% 
  select(-kommunid, -kommun) %>% 
  nest(data=-tid) %>% 
  mutate(data = map(data, ~as.matrix(.x) %>% r_to_py())) %>% 
  select(-tid) %>% 
  as.list %>% 
  .$data %>% 
  as.array() -> 
  kommun_slices

valresultat_kommuner %>% arrange(tid, kommunid) %>% group_by(tid) %>% 
  mutate(row = row_number()) %>% 
  group_by(kommunid) %>% 
  mutate(id_lead = lead(row)) %>% 
  ungroup() %>% 
  drop_na(id_lead) %>% select(tid, row, id_lead) %>% 
  mutate(tid = tid %>% factor %>% as.numeric) %>% 
  retype %>% 
  nest(data = c(-tid)) %>% 
  mutate(data = map(data, ~py_dict(.x$row, .x$id_lead))) %>% as.list() %>%  
  .$data -> 
  kommuner_dict

Then we import the aligned UMAP function from the UMAP package, in the Python session:

py_run_string("import umap.aligned_umap")

Then we can run the alignment UMAP. The reason for not using a converted function is due to the conversion of either the dictionary or the slices that doesn’t work properly. I haven’t figured out a way to solve that.

tic()
py_run_string("aligned_mapper = umap.aligned_umap.AlignedUMAP().fit(r.kommun_slices, relations=r.kommuner_dict)")
toc()

Then pull the embeddings, convert it into a Python list, and then into an R list:

py_run_string("embeddings = list(aligned_mapper.embeddings_)")
py$embeddings -> embeddings

Then we can make some plots, if we first combine the embeddings with the original data:

valresultat_kommuner %>% 
  arrange(tid, kommunid) %>% 
  nest(data=-tid) %>% 
  bind_cols(embeddings %>% enframe()) %>% 
  mutate(value = map(value, ~as.data.frame(.x))) %>% 
  unnest(c(data, value)) %>% 
  select(-name) -> 
  combined_data

Plots

combined_data %>% 
  ggplot(aes(V1, V2))+
  geom_point()+
  labs(x=NULL, y=NULL)+
  coord_equal()+
  facet_wrap(~tid)+
  theme(axis.text = element_blank(), axis.ticks = element_blank(), 
        panel.grid.major = element_blank(), panel.grid.minor = element_blank())

Then we can create an interactive plot with a time slider. However, the set of municipalities changes over time, which creates “structural breaks” when the set of municipalies changes. This is a bug in Plotly. Compare e.g. 1998 with 2002.

combined_data %>% 
  rename(`Election Year` = tid) %>% 
  ggplot(aes(V1, V2))+
  geom_point(aes(frame=`Election Year`, 
                 label = kommun))+
  labs(x=NULL, y=NULL)+
  coord_equal() +
  theme(axis.text = element_blank(), axis.ticks = element_blank(), 
        panel.grid.major = element_blank(), panel.grid.minor = element_blank()) -> 
  plotobj

ggplotly(plotobj, 
         tooltip = "label")

We can create an animated plot to compare with:

combined_data %>% 
  ggplot(aes(V1, V2, group=kommun))+
    theme(axis.text = element_blank(), axis.ticks = element_blank(), 
        panel.grid.major = element_blank(), panel.grid.minor = element_blank())+
  geom_point()+
  transition_time(tid)+
  labs(title = 'Year: {frame_time}', 
       x=NULL, 
       y=NULL) -> animation_plot

anim_save("animation.gif", animation_plot)

Here we clearly see that the “structural breaks” we saw before was an artifact from Plotly.

Aligned UMAP of Swedish Election Results

Plots