The analysis refers to the electoral results for the Italian Referendum which was held on the 4th of December 2016. The vote was about the reform of a large part of the Italian constitution, and the choice was between SI (to approve the reform) and NO (to reject the reform).
The data source for the analysis is the following url: http://elezioni.interno.it/opendata.html.
The reform was rejected by a large majority of voters.
The aim of my analysis is to explore the electoral results at regional and provincial level, in order to discover in which areas the opposition to the reform was stronger (or conversely it was weaker).
refdata <- read.csv2("~/OpenData/ScrutiniFI.csv",
as.is = c(1:3))
for (j in 1:3) {
refdata[, j] <- gsub(" ", "", refdata[, j])
refdata[, j] <- gsub(" $", "", refdata[, j])
}
str(refdata)
## 'data.frame': 7998 obs. of 12 variables:
## $ DESCREGIONE : chr "ABRUZZO" "ABRUZZO" "ABRUZZO" "ABRUZZO" ...
## $ DESCPROVINCIA : chr "CHIETI" "CHIETI" "CHIETI" "CHIETI" ...
## $ DESCCOMUNE : chr "ALTINO" "ARCHI" "ARI" "ARIELLI" ...
## $ ELETTORI : int 2288 1785 831 939 8454 686 291 4194 1148 495 ...
## $ ELETTORI_M : int 1101 861 402 453 4121 344 139 2044 563 247 ...
## $ VOTANTI : int 1496 1241 617 612 5860 467 187 2776 760 292 ...
## $ VOTANTI_M : int 775 632 328 304 3006 239 97 1411 397 155 ...
## $ NUMVOTISI : int 533 442 241 194 1952 168 72 739 258 77 ...
## $ NUMVOTINO : int 953 782 366 410 3836 297 112 2015 482 203 ...
## $ NUMVOTIBIANCHI : int 2 3 6 1 45 2 1 7 9 9 ...
## $ NUMVOTINONVALIDI : int 8 14 4 7 27 0 2 15 11 3 ...
## $ NUMVOTICONTESTATI: int 0 0 0 0 0 0 0 0 0 0 ...
# summarize the data
mydata <- aggregate(x = refdata[, 4:12],
by = list(Regione=refdata$DESCREGIONE), FUN = sum)
mydata$VotiTotal <- mydata$VOTANTI - (mydata$NUMVOTIBIANCHI + mydata$NUMVOTINONVALIDI)
mydata$PercVoti <- round(100*mydata$VotiTotal/mydata$ELETTORI, digits = 1)
mydata$PercSI <- round(100 * mydata[, 6]/mydata$VotiTotal, digits = 2)
mydata$PercNO <- round(100 * mydata[, 7]/mydata$VotiTotal, digits = 2)
# reorder and display the data
mydatareg <- mydata[order(mydata$PercNO, decreasing = TRUE), ]
mydatareg[ , c(1:2, 11:14)]
## Regione ELETTORI VotiTotal PercVoti PercSI PercNO
## 14 SARDEGNA 1375735 854091 62.1 27.78 72.22
## 15 SICILIA 4031871 2262909 56.1 28.40 71.59
## 4 CAMPANIA 4566905 2667540 58.4 31.48 68.52
## 13 PUGLIA 3280712 2008026 61.2 32.84 67.16
## 3 CALABRIA 1553741 837967 53.9 32.96 67.03
## 2 BASILICATA 467000 290017 62.1 34.11 65.89
## 1 ABRUZZO 1052049 716202 68.1 35.60 64.39
## 7 LAZIO 4402145 3023245 68.7 36.67 63.32
## 20 VENETO 3725400 2835078 76.1 38.04 61.95
## 6 FRIULI-VENEZIA GIULIA 952494 685133 71.9 39.03 60.97
## 11 MOLISE 256600 162434 63.3 39.21 60.78
## 8 LIGURIA 1241469 858552 69.2 39.91 60.08
## 19 VALLE D'AOSTA 99735 70685 70.9 43.25 56.75
## 12 PIEMONTE 3396378 2423617 71.4 43.53 56.47
## 9 LOMBARDIA 7480375 5511544 73.7 44.51 55.49
## 10 MARCHE 1189181 858550 72.2 44.93 55.07
## 18 UMBRIA 675610 492268 72.9 48.82 51.17
## 5 EMILIA-ROMAGNA 3326910 2505525 75.3 50.39 49.61
## 16 TOSCANA 2854129 2105831 73.8 52.51 47.49
## 17 TRENTINO-ALTO ADIGE 792504 566803 71.5 53.87 46.13
barplot(height = t(as.matrix(mydatareg[, c(14:13)])),
names.arg = mydatareg$Regione,
legend.text = TRUE,
args.legend = list(x = "topright", cex = 0.9, inset = c(0.1,-0.1)), col = c("skyblue", "darkorange"),
axisnames = TRUE, las = 2, cex.names = 0.6)
hl <- round(100 * sum(mydata[, 7]) / sum(mydata$VotiTotal), digits = 2)
abline(h = hl, col = "red", lwd = 2)
# summarize the data
mydata <- aggregate(x = refdata[, 4:12],
by=list(Provincia=refdata$DESCPROVINCIA), FUN = sum)
mydata$VotiTotal <- mydata$VOTANTI - (mydata$NUMVOTIBIANCHI + mydata$NUMVOTINONVALIDI)
mydata$PercVoti <- round(100*mydata$VotiTotal/mydata$ELETTORI, digits = 1)
mydata$PercSI <- round(100 * mydata[, 6]/mydata$VotiTotal, digits = 2)
mydata$PercNO <- round(100 * mydata[, 7]/mydata$VotiTotal, digits = 2)
# how many are the areas with a majority of NO?
mydata$NOwins <- ifelse(mydata$PercNO > mydata$PercSI, TRUE, FALSE)
table(mydata$NOwins)
##
## FALSE TRUE
## 12 94
# how many are the areas with a percentage of NO >= 60%?
mydata$NOg60 <- ifelse(mydata$PercNO >= 60.0, TRUE,
FALSE)
table(mydata$NOg60)
##
## FALSE TRUE
## 51 55
# reorder the data
mydataprv <- mydata[order(mydata$PercNO,
decreasing = TRUE), ]
# display the 10 highest vote percentages for NO
areaH <- mydataprv[1:10, c(1:2, 11:14)]
areaH
## Provincia ELETTORI VotiTotal PercVoti PercSI PercNO
## 23 CATANIA 884612 512421 57.9 25.44 74.55
## 62 ORISTANO 136930 84248 61.5 26.02 73.98
## 19 CAGLIARI 659314 418595 63.5 26.22 73.77
## 64 PALERMO 1011728 555583 54.9 27.53 72.46
## 88 SIRACUSA 322752 181724 56.3 28.10 71.90
## 22 CASERTA 713441 419647 58.8 28.31 71.69
## 61 NUORO 176886 104511 59.1 28.75 71.25
## 20 CALTANISSETTA 216007 114994 53.2 28.86 71.14
## 59 NAPOLI 2402977 1348869 56.1 29.62 70.37
## 1 AGRIGENTO 354596 185410 52.3 29.70 70.30
# display the 10 lowest vote percentages for NO
areaL <- mydataprv[106:97, c(1:2, 11:14)]
areaL
## Provincia ELETTORI VotiTotal PercVoti PercSI PercNO
## 16 BOLZANO 386092 257262 66.6 63.69 36.31
## 34 FIRENZE 748871 577313 77.1 57.71 42.29
## 87 SIENA 204024 154195 75.6 57.18 42.82
## 75 PRATO 180243 132684 73.6 55.72 44.28
## 5 AREZZO 263101 194847 74.1 54.07 45.92
## 57 MODENA 514584 392424 76.3 53.06 46.94
## 77 RAVENNA 294016 222447 75.7 52.82 47.18
## 15 BOLOGNA 758588 580142 76.5 52.30 47.70
## 72 PISTOIA 225615 164471 72.9 52.14 47.86
## 36 FORLI'-CESENA 298785 227359 76.1 51.88 48.12
library(ggplot2)
library(reshape2)
area <- mydataprv[c(1:10, 106:97), c(1:2, 11:14)]
area <- melt(data = area, id.vars = names(area)[1],
measure.vars = names(area)[5:6])
ggdf <- area[c(1:10, 21:30), ]
gh <- ggplot(ggdf, aes(Provincia, value, fill = variable, label = value))
gh <- gh + geom_bar(stat = "identity") + geom_text(size = 3, vjust = 3)
gh <- gh + labs(y = "Vote percentage", title = "Highest results for NO")
gh
mydatareg <- melt(data = mydatareg,
id.vars = names(mydatareg)[1],
measure.vars = names(mydatareg)[6:7])
p <- ggplot(mydatareg, aes(x = factor(1), y = value,
fill = variable))
p <- p + geom_bar(stat = "identity", width = 1) + xlab("")
p <- p + coord_polar(theta = "y")
p