Libraries used. rgugik provides necessary data
library(rgugik)
library(sf)
library(dplyr)
library(purrr)
library(leaflet)
library(readxl)
library(knitr)
library(htmltools)
library(factoextra)
The data on population and cities comes from Polish Statistical Office GUS. spreadsheet, sheets 2 & 9
Appropriate data is extracted and formetted for further usage
wojs <- wojs[c(6:21), c(2,5)]
colnames(wojs) <- c("voivodeship", "population")
wojs$population <- as.integer(wojs$population)
wojs$voivodeship <- wojs$voivodeship %>% trimws() %>% tolower()
kable(wojs)
| voivodeship | population |
|---|---|
| dolnośląskie | 2897737 |
| kujawsko-pomorskie | 2017720 |
| lubelskie | 2038299 |
| lubuskie | 985487 |
| łódzkie | 2394946 |
| małopolskie | 3430370 |
| mazowieckie | 5512794 |
| opolskie | 948583 |
| podkarpackie | 2085932 |
| podlaskie | 1148720 |
| pomorskie | 2358726 |
| śląskie | 4375947 |
| świętokrzyskie | 1187693 |
| warmińsko-mazurskie | 1374699 |
| wielkopolskie | 3500030 |
| zachodniopomorskie | 1650021 |
Extracting data on number of cities (gmina miejska) in each voivodeship
voi_cities <- voi_cities[as.integer(seq.int(10, 40, 2)), c(1,3)]
colnames(voi_cities) <- c("voivodeship", "no_cities")
voi_cities$no_cities <- as.integer(voi_cities$no_cities)
kable(voi_cities)
| voivodeship | no_cities |
|---|---|
| Dolnośląskie | 35 |
| Kujawsko-pomorskie | 17 |
| Lubelskie | 20 |
| Lubuskie | 9 |
| Łódzkie | 18 |
| Małopolskie | 14 |
| Mazowieckie | 35 |
| Opolskie | 3 |
| Podkarpackie | 16 |
| Podlaskie | 13 |
| Pomorskie | 22 |
| Śląskie | 49 |
| Świętokrzyskie | 5 |
| Warmińsko-mazurskie | 16 |
| Wielkopolskie | 19 |
| Zachodniopomorskie | 11 |
The data on polygons comes from GUGIK - coordinates of borders - via
rgugik
Borders of the voivodeships to draw polygons
voi_geoms <- borders_get(wojs$voivodeship)
voi_geoms <- st_transform(voi_geoms, "EPSG:4326")
Data is prepared and the number of clusters is determined as 5
voi_tot <- cbind.data.frame(voi_cities, voi_geoms, wojs)
voi_geoms <- voi_geoms[order(voi_tot$population), ]
voi_tot <- voi_tot[order(voi_tot$population), ]
fviz_nbclust(as.data.frame(voi_tot$population), kmeans, method = "wss")
no_clusters <- 5
km <- kmeans(voi_tot$population, no_clusters)
voi_tot$cluster <- km$cluster
km
## K-means clustering with 5 clusters of sizes 5, 3, 2, 2, 4
##
## Cluster means:
## [,1]
## 1 2179125
## 2 3276046
## 3 4944371
## 4 1512360
## 5 1067621
##
## Clustering vector:
## [1] 5 5 5 5 4 4 1 1 1 1 1 2 2 2 3 3
##
## Within cluster sum of squares by cluster:
## [1] 133403694791 217102428713 646210550705 37901101842 41910372385
## (between_SS / total_SS = 95.7 %)
##
## Available components:
##
## [1] "cluster" "centers" "totss" "withinss" "tot.withinss"
## [6] "betweenss" "size" "iter" "ifault"
kable(voi_tot)
| voivodeship | no_cities | TERYT | geom | voivodeship | population | cluster | |
|---|---|---|---|---|---|---|---|
| 8 | Opolskie | 3 | 16 | MULTIPOLYGON (((18.06438 50… | opolskie | 948583 | 5 |
| 4 | Lubuskie | 9 | 08 | MULTIPOLYGON (((15.38223 51… | lubuskie | 985487 | 5 |
| 10 | Podlaskie | 13 | 20 | MULTIPOLYGON (((21.98108 52… | podlaskie | 1148720 | 5 |
| 13 | Świętokrzyskie | 5 | 26 | MULTIPOLYGON (((19.7043 50…. | świętokrzyskie | 1187693 | 5 |
| 14 | Warmińsko-mazurskie | 16 | 28 | MULTIPOLYGON (((21.55932 54… | warmińsko-mazurskie | 1374699 | 4 |
| 16 | Zachodniopomorskie | 11 | 32 | MULTIPOLYGON (((14.81157 54… | zachodniopomorskie | 1650021 | 4 |
| 2 | Kujawsko-pomorskie | 17 | 04 | MULTIPOLYGON (((17.6567 53…. | kujawsko-pomorskie | 2017720 | 1 |
| 3 | Lubelskie | 20 | 06 | MULTIPOLYGON (((22.19999 50… | lubelskie | 2038299 | 1 |
| 9 | Podkarpackie | 16 | 18 | MULTIPOLYGON (((22.19999 50… | podkarpackie | 2085932 | 1 |
| 11 | Pomorskie | 22 | 22 | MULTIPOLYGON (((16.71258 54… | pomorskie | 2358726 | 1 |
| 5 | Łódzkie | 18 | 10 | MULTIPOLYGON (((20.5276 51…. | łódzkie | 2394946 | 1 |
| 1 | Dolnośląskie | 35 | 02 | MULTIPOLYGON (((15.38223 51… | dolnośląskie | 2897737 | 2 |
| 6 | Małopolskie | 14 | 12 | MULTIPOLYGON (((21.17981 50… | małopolskie | 3430370 | 2 |
| 15 | Wielkopolskie | 19 | 30 | MULTIPOLYGON (((16.71447 53… | wielkopolskie | 3500030 | 2 |
| 12 | Śląskie | 49 | 24 | MULTIPOLYGON (((18.06438 50… | śląskie | 4375947 | 3 |
| 7 | Mazowieckie | 35 | 14 | MULTIPOLYGON (((20.5276 51…. | mazowieckie | 5512794 | 3 |
Colours for clusters
colours <- colorFactor("Set1", voi_tot$cluster)
Labels for polygons
labels <- sprintf("<strong>%s</strong><br/>%s people<br/>%d cities</sup>",
voi_tot$voivodeship, format(voi_tot$population, big.mark = ' '), voi_tot$no_cities) %>%
lapply(HTML)
leaflet(voi_geoms) %>%
addProviderTiles("OpenStreetMap.Mapnik") %>% #~colours(voi_tot$cluster)
addPolygons(fillColor = ~colours(voi_tot$cluster), fillOpacity = 0.75, label = labels,
labelOptions = labelOptions(
style = list("font-weight" = "normal", padding = "3px 8px"),
textsize = "15px",
direction = "auto"),
highlightOptions = highlightOptions(
weight = 5,
fillOpacity = 1,
bringToFront = TRUE)
)