I have already analyzed the results of the Italian Referendum (http://rpubs.com/GValentini/245511) held in December 2016.
In the following work my purpose is to use the R package ggplot2 and its beautiful and impressive map tools to display the results at provincial level.
The data source for the analysis is the following url: http://elezioni.interno.it/opendata.html.
refdata <- read.csv2("~/OpenData/ScrutiniFI.csv",
as.is = c(1:3))
for (j in 1:3) {
refdata[, j] <- gsub(" ", "", refdata[, j])
refdata[, j] <- gsub(" $", "", refdata[, j])
}
str(refdata)
## 'data.frame': 7998 obs. of 12 variables:
## $ DESCREGIONE : chr "ABRUZZO" "ABRUZZO" "ABRUZZO" "ABRUZZO" ...
## $ DESCPROVINCIA : chr "CHIETI" "CHIETI" "CHIETI" "CHIETI" ...
## $ DESCCOMUNE : chr "ALTINO" "ARCHI" "ARI" "ARIELLI" ...
## $ ELETTORI : int 2288 1785 831 939 8454 686 291 4194 1148 495 ...
## $ ELETTORI_M : int 1101 861 402 453 4121 344 139 2044 563 247 ...
## $ VOTANTI : int 1496 1241 617 612 5860 467 187 2776 760 292 ...
## $ VOTANTI_M : int 775 632 328 304 3006 239 97 1411 397 155 ...
## $ NUMVOTISI : int 533 442 241 194 1952 168 72 739 258 77 ...
## $ NUMVOTINO : int 953 782 366 410 3836 297 112 2015 482 203 ...
## $ NUMVOTIBIANCHI : int 2 3 6 1 45 2 1 7 9 9 ...
## $ NUMVOTINONVALIDI : int 8 14 4 7 27 0 2 15 11 3 ...
## $ NUMVOTICONTESTATI: int 0 0 0 0 0 0 0 0 0 0 ...
# summarize the data
seldf <- refdata[refdata$DESCREGIONE %in% c("ABRUZZO", "BASILICATA", "CAMPANIA", "FRIULI-VENEZIA GIULIA", "LAZIO", "MOLISE", "SICILIA", "TRENTINO-ALTO ADIGE", "VENETO"), ]
mydata <- aggregate(x = seldf[, 4:12],
by = list(Zona = seldf$DESCREGIONE, Provincia = seldf$DESCPROVINCIA), FUN = sum)
mydata$VotiTotal <- mydata$VOTANTI - (mydata$NUMVOTIBIANCHI + mydata$NUMVOTINONVALIDI)
mydata$PercVoti <- round(100*mydata$VotiTotal/mydata$ELETTORI, digits = 1)
mydata$PercSI <- round(100 * mydata[, 7]/mydata$VotiTotal, digits = 2)
mydata$PercNO <- round(100 * mydata[, 8]/mydata$VotiTotal, digits = 2)
We select the map in ggplot2 R package:
library(ggplot2)
library(plyr)
mymap <- map_data("italy")
# change the name of the Province of Bolzano
mymap$region <- gsub("-Bozen", "", mymap$region)
mymap$region <- toupper(mymap$region)
mydata$region <- mydata$Provincia
# in order to group the map data by field Zona
mymap <- join(x = mymap, y = mydata,
by = "region", type = "inner")
The first plot displays the results in 3 Regions of Central Italy.
mydf <- mydata[mydata$Zona %in% c("ABRUZZO", "LAZIO", "MOLISE"), ]
tmp_map <- mymap[mymap$Zona %in% c("ABRUZZO", "LAZIO", "MOLISE"), ]
qnt <- seq(60.0, 70.0, 2.0)
tmp_map$value <- cut(tmp_map$PercNO, breaks = qnt, include.lowest = TRUE)
sort(unique(tmp_map$value))
## [1] [60,62] (62,64] (64,66] (68,70]
## Levels: [60,62] (62,64] (64,66] (66,68] (68,70]
# Table of results
mydf[, c(1:2, 5, 14:15)]
## Zona Provincia VOTANTI PercSI PercNO
## 7 MOLISE CAMPOBASSO 119473 39.88 60.11
## 10 ABRUZZO CHIETI 215862 35.59 64.40
## 12 LAZIO FROSINONE 267736 31.91 68.09
## 14 MOLISE ISERNIA 44565 37.41 62.59
## 15 ABRUZZO L'AQUILA 162944 35.05 64.95
## 16 LAZIO LATINA 299092 31.33 68.67
## 22 ABRUZZO PESCARA 176565 35.20 64.80
## 26 LAZIO RIETI 84527 37.39 62.61
## 27 LAZIO ROMA 2215349 38.05 61.94
## 31 ABRUZZO TERAMO 167559 36.58 63.42
## 40 LAZIO VITERBO 177969 35.30 64.68
The plot shows the level of support for NO by Province in the selected area.
m <- ggplot(data = tmp_map, aes(x = long, y = lat))
m <- m +
geom_polygon(aes(group = group, fill = value), colour = "red")
m <- m + scale_fill_brewer(palette = "YlGnBu")
m <- m + coord_fixed(ylim = c(41.2, 42.9), ratio = 1.25)
m <- m + theme_bw()
m <- m + labs(title = "Abruzzo - Lazio - Molise", x = "", y = "", fill = "% of NO")
m
The second plot displays the results in 2 Regions of Southern Italy.
mydf <- mydata[mydata$Zona %in% c("BASILICATA", "CAMPANIA"), ]
tmp_map <- mymap[mymap$Zona %in% c("BASILICATA", "CAMPANIA"), ]
qnt <- seq(60.0, 72.0, 3.0)
tmp_map$value <- cut(tmp_map$PercNO, breaks = qnt, include.lowest = TRUE)
sort(unique(tmp_map$value))
## [1] [60,63] (63,66] (66,69] (69,72]
## Levels: [60,63] (63,66] (66,69] (69,72]
# Table of results
mydf[, c(1:2, 5, 14:15)]
## Zona Provincia VOTANTI PercSI PercNO
## 2 CAMPANIA AVELLINO 217132 38.70 61.30
## 4 CAMPANIA BENEVENTO 144815 32.97 67.02
## 8 CAMPANIA CASERTA 423054 28.31 71.69
## 17 BASILICATA MATERA 99543 31.84 68.15
## 19 CAMPANIA NAPOLI 1358562 29.62 70.37
## 24 BASILICATA POTENZA 194003 35.27 64.72
## 29 CAMPANIA SALERNO 545507 35.31 64.69
The plot shows the level of support for NO by Province in the selected area.
m <- ggplot(data = tmp_map, aes(x = long, y = lat))
m <- m +
geom_polygon(aes(group = group, fill = value), colour = "red")
m <- m + scale_fill_brewer(palette = "YlGnBu")
m <- m + coord_fixed(ratio = 1.25)
m <- m + theme_bw()
m <- m + labs(title = "Basilicata - Campania", x = "", y = "", fill = "% of NO")
m
The third plot displays the results in the Region of Sicily.
mydf <- mydata[mydata$Zona %in% c("SICILIA"), ]
tmp_map <- mymap[mymap$Zona %in% c("SICILIA"), ]
qnt <- seq(60.0, 75.0, 2.5)
tmp_map$value <- cut(tmp_map$PercNO, breaks = qnt, include.lowest = TRUE)
sort(unique(tmp_map$value))
## [1] (65,67.5] (67.5,70] (70,72.5] (72.5,75]
## Levels: [60,62.5] (62.5,65] (65,67.5] (67.5,70] (70,72.5] (72.5,75]
# Table of results
mydf[, c(1:2, 5, 14:15)]
## Zona Provincia VOTANTI PercSI PercNO
## 1 SICILIA AGRIGENTO 187267 29.70 70.30
## 6 SICILIA CALTANISSETTA 115958 28.86 71.14
## 9 SICILIA CATANIA 516771 25.44 74.55
## 11 SICILIA ENNA 77636 32.65 67.35
## 18 SICILIA MESSINA 301601 30.45 69.55
## 21 SICILIA PALERMO 560856 27.53 72.46
## 25 SICILIA RAGUSA 146037 31.74 68.26
## 30 SICILIA SIRACUSA 183045 28.10 71.90
## 32 SICILIA TRAPANI 195083 30.21 69.79
The plot shows the level of support for NO by Province in the selected area.
m <- ggplot(data = tmp_map, aes(x = long, y = lat))
m <- m +
geom_polygon(aes(group = group, fill = value), colour = "red")
m <- m + scale_fill_brewer(palette = "YlGnBu")
m <- m + coord_fixed(xlim = c(12.3, 15.7), ylim = c(36.5, 38.5), ratio = 1.25)
m <- m + theme_bw()
m <- m + labs(title = "Sicily", x = "", y = "", fill = "% of NO")
m
The fourth plot displays the results in 3 Regions of North-Eastern Italy.
mydf <- mydata[mydata$Zona %in% c("FRIULI-VENEZIA GIULIA", "TRENTINO-ALTO ADIGE", "VENETO"), ]
tmp_map <- mymap[mymap$Zona %in% c("FRIULI-VENEZIA GIULIA", "TRENTINO-ALTO ADIGE", "VENETO"), ]
qnt <- seq(35.0, 65.0, 5.0)
tmp_map$value <- cut(tmp_map$PercNO, breaks = qnt, include.lowest = TRUE)
sort(unique(tmp_map$value))
## [1] [35,40] (50,55] (55,60] (60,65]
## Levels: [35,40] (40,45] (45,50] (50,55] (55,60] (60,65]
# Table of results
mydf[, c(1:2, 5, 14:15)]
## Zona Provincia VOTANTI PercSI PercNO
## 3 VENETO BELLUNO 118652 38.98 61.02
## 5 TRENTINO-ALTO ADIGE BOLZANO 260273 63.69 36.31
## 13 FRIULI-VENEZIA GIULIA GORIZIA 79099 39.46 60.52
## 20 VENETO PADOVA 561369 38.01 61.99
## 23 FRIULI-VENEZIA GIULIA PORDENONE 175462 40.02 59.98
## 28 VENETO ROVIGO 142590 39.67 60.33
## 33 TRENTINO-ALTO ADIGE TRENTO 312213 45.70 54.29
## 34 VENETO TREVISO 509011 37.26 62.74
## 35 FRIULI-VENEZIA GIULIA TRIESTE 126293 36.63 63.37
## 36 FRIULI-VENEZIA GIULIA UDINE 309863 39.33 60.67
## 37 VENETO VENEZIA 489809 38.28 61.72
## 38 VENETO VERONA 524037 39.11 60.89
## 39 VENETO VICENZA 510581 36.87 63.12
The plot shows the level of support for NO by Province in the selected area.
m <- ggplot(data = tmp_map, aes(x = long, y = lat))
m <- m +
geom_polygon(aes(group = group, fill = value), colour = "red")
m <- m + scale_fill_brewer(palette = "YlGnBu")
m <- m + coord_fixed(ratio = 1.25)
m <- m + theme_bw()
m <- m + labs(title = "Friuli-Venezia Giulia - Trentino-Alto Adige - Veneto", x = "", y = "", fill = "% of NO")
m