Data preparation

Libraries used. rgugik provides necessary data

library(rgugik)
library(sf)
library(dplyr)
library(purrr)
library(leaflet)
library(readxl)
library(knitr)
library(htmltools)
library(factoextra)

Gathering data

The data on population and cities comes from Polish Statistical Office GUS. spreadsheet, sheets 2 & 9

Population

Appropriate data is extracted and formetted for further usage

wojs <- wojs[c(6:21), c(2,5)]
colnames(wojs) <- c("voivodeship", "population")
wojs$population <- as.integer(wojs$population)
wojs$voivodeship <- wojs$voivodeship %>% trimws() %>% tolower()
kable(wojs)
voivodeship population
dolnośląskie 2897737
kujawsko-pomorskie 2017720
lubelskie 2038299
lubuskie 985487
łódzkie 2394946
małopolskie 3430370
mazowieckie 5512794
opolskie 948583
podkarpackie 2085932
podlaskie 1148720
pomorskie 2358726
śląskie 4375947
świętokrzyskie 1187693
warmińsko-mazurskie 1374699
wielkopolskie 3500030
zachodniopomorskie 1650021

Cities

Extracting data on number of cities (gmina miejska) in each voivodeship

voi_cities <- voi_cities[as.integer(seq.int(10, 40, 2)), c(1,3)]
colnames(voi_cities) <- c("voivodeship", "no_cities")
voi_cities$no_cities <- as.integer(voi_cities$no_cities)
kable(voi_cities)
voivodeship no_cities
Dolnośląskie 35
Kujawsko-pomorskie 17
Lubelskie 20
Lubuskie 9
Łódzkie 18
Małopolskie 14
Mazowieckie 35
Opolskie 3
Podkarpackie 16
Podlaskie 13
Pomorskie 22
Śląskie 49
Świętokrzyskie 5
Warmińsko-mazurskie 16
Wielkopolskie 19
Zachodniopomorskie 11

Polygons

The data on polygons comes from GUGIK - coordinates of borders - via rgugik

Borders of the voivodeships to draw polygons

voi_geoms <- borders_get(wojs$voivodeship)
voi_geoms <- st_transform(voi_geoms, "EPSG:4326")

Clustering

Data is prepared and the number of clusters is determined as 5

voi_tot <- cbind.data.frame(voi_cities, voi_geoms, wojs)
voi_geoms <- voi_geoms[order(voi_tot$population), ]
voi_tot <- voi_tot[order(voi_tot$population), ]
fviz_nbclust(as.data.frame(voi_tot$population), kmeans, method = "wss")

no_clusters <- 5
km <- kmeans(voi_tot$population, no_clusters)
voi_tot$cluster <- km$cluster
km
## K-means clustering with 5 clusters of sizes 5, 3, 2, 2, 4
## 
## Cluster means:
##      [,1]
## 1 2179125
## 2 3276046
## 3 4944371
## 4 1512360
## 5 1067621
## 
## Clustering vector:
##  [1] 5 5 5 5 4 4 1 1 1 1 1 2 2 2 3 3
## 
## Within cluster sum of squares by cluster:
## [1] 133403694791 217102428713 646210550705  37901101842  41910372385
##  (between_SS / total_SS =  95.7 %)
## 
## Available components:
## 
## [1] "cluster"      "centers"      "totss"        "withinss"     "tot.withinss"
## [6] "betweenss"    "size"         "iter"         "ifault"
kable(voi_tot)
voivodeship no_cities TERYT geom voivodeship population cluster
8 Opolskie 3 16 MULTIPOLYGON (((18.06438 50… opolskie 948583 5
4 Lubuskie 9 08 MULTIPOLYGON (((15.38223 51… lubuskie 985487 5
10 Podlaskie 13 20 MULTIPOLYGON (((21.98108 52… podlaskie 1148720 5
13 Świętokrzyskie 5 26 MULTIPOLYGON (((19.7043 50…. świętokrzyskie 1187693 5
14 Warmińsko-mazurskie 16 28 MULTIPOLYGON (((21.55932 54… warmińsko-mazurskie 1374699 4
16 Zachodniopomorskie 11 32 MULTIPOLYGON (((14.81157 54… zachodniopomorskie 1650021 4
2 Kujawsko-pomorskie 17 04 MULTIPOLYGON (((17.6567 53…. kujawsko-pomorskie 2017720 1
3 Lubelskie 20 06 MULTIPOLYGON (((22.19999 50… lubelskie 2038299 1
9 Podkarpackie 16 18 MULTIPOLYGON (((22.19999 50… podkarpackie 2085932 1
11 Pomorskie 22 22 MULTIPOLYGON (((16.71258 54… pomorskie 2358726 1
5 Łódzkie 18 10 MULTIPOLYGON (((20.5276 51…. łódzkie 2394946 1
1 Dolnośląskie 35 02 MULTIPOLYGON (((15.38223 51… dolnośląskie 2897737 2
6 Małopolskie 14 12 MULTIPOLYGON (((21.17981 50… małopolskie 3430370 2
15 Wielkopolskie 19 30 MULTIPOLYGON (((16.71447 53… wielkopolskie 3500030 2
12 Śląskie 49 24 MULTIPOLYGON (((18.06438 50… śląskie 4375947 3
7 Mazowieckie 35 14 MULTIPOLYGON (((20.5276 51…. mazowieckie 5512794 3

Map

Colours for clusters

colours <- colorFactor("Set1", voi_tot$cluster)

Labels for polygons

labels <- sprintf("<strong>%s</strong><br/>%s people<br/>%d cities</sup>",
    voi_tot$voivodeship, format(voi_tot$population, big.mark = ' '), voi_tot$no_cities) %>% 
  lapply(HTML)

The map

leaflet(voi_geoms) %>% 
  addProviderTiles("OpenStreetMap.Mapnik") %>% #~colours(voi_tot$cluster)
  addPolygons(fillColor = ~colours(voi_tot$cluster), fillOpacity = 0.75, label = labels,
    labelOptions = labelOptions(
      style = list("font-weight" = "normal", padding = "3px 8px"),
      textsize = "15px",
      direction = "auto"),
    highlightOptions = highlightOptions(
      weight = 5,
      fillOpacity = 1,
      bringToFront = TRUE)
    )