Synopsis

The analysis refers to the electoral results for the Italian Referendum which was held on the 4th of December 2016. The vote was about the reform of a large part of the Italian constitution, and the choice was between SI (to approve the reform) and NO (to reject the reform).

The data source for the analysis is the following url: http://elezioni.interno.it/opendata.html.

The reform was rejected by a large majority of voters.

The aim of my analysis is to explore the electoral results at regional and provincial level, in order to discover in which areas the opposition to the reform was stronger (or conversely it was weaker).

Step 1 - Read the data

refdata <- read.csv2("~/OpenData/ScrutiniFI.csv", 
                     as.is = c(1:3))
for (j in 1:3) {
  refdata[, j] <- gsub("  ", "", refdata[, j])
  refdata[, j] <- gsub(" $", "", refdata[, j])
  }
str(refdata)
## 'data.frame':    7998 obs. of  12 variables:
##  $ DESCREGIONE      : chr  "ABRUZZO" "ABRUZZO" "ABRUZZO" "ABRUZZO" ...
##  $ DESCPROVINCIA    : chr  "CHIETI" "CHIETI" "CHIETI" "CHIETI" ...
##  $ DESCCOMUNE       : chr  "ALTINO" "ARCHI" "ARI" "ARIELLI" ...
##  $ ELETTORI         : int  2288 1785 831 939 8454 686 291 4194 1148 495 ...
##  $ ELETTORI_M       : int  1101 861 402 453 4121 344 139 2044 563 247 ...
##  $ VOTANTI          : int  1496 1241 617 612 5860 467 187 2776 760 292 ...
##  $ VOTANTI_M        : int  775 632 328 304 3006 239 97 1411 397 155 ...
##  $ NUMVOTISI        : int  533 442 241 194 1952 168 72 739 258 77 ...
##  $ NUMVOTINO        : int  953 782 366 410 3836 297 112 2015 482 203 ...
##  $ NUMVOTIBIANCHI   : int  2 3 6 1 45 2 1 7 9 9 ...
##  $ NUMVOTINONVALIDI : int  8 14 4 7 27 0 2 15 11 3 ...
##  $ NUMVOTICONTESTATI: int  0 0 0 0 0 0 0 0 0 0 ...

Step 2 - Analyze the data at regional level

# summarize the data
mydata <- aggregate(x = refdata[, 4:12], 
                    by = list(Regione=refdata$DESCREGIONE), FUN = sum)
mydata$VotiTotal <- mydata$VOTANTI - (mydata$NUMVOTIBIANCHI + mydata$NUMVOTINONVALIDI)
mydata$PercVoti <- round(100*mydata$VotiTotal/mydata$ELETTORI, digits = 1)
mydata$PercSI <- round(100 * mydata[, 6]/mydata$VotiTotal, digits = 2)
mydata$PercNO <- round(100 * mydata[, 7]/mydata$VotiTotal, digits = 2)
# reorder and display the data
mydatareg <- mydata[order(mydata$PercNO, decreasing = TRUE), ]
mydatareg[ , c(1:2, 11:14)]
##                  Regione ELETTORI VotiTotal PercVoti PercSI PercNO
## 14              SARDEGNA  1375735    854091     62.1  27.78  72.22
## 15               SICILIA  4031871   2262909     56.1  28.40  71.59
## 4               CAMPANIA  4566905   2667540     58.4  31.48  68.52
## 13                PUGLIA  3280712   2008026     61.2  32.84  67.16
## 3               CALABRIA  1553741    837967     53.9  32.96  67.03
## 2             BASILICATA   467000    290017     62.1  34.11  65.89
## 1                ABRUZZO  1052049    716202     68.1  35.60  64.39
## 7                  LAZIO  4402145   3023245     68.7  36.67  63.32
## 20                VENETO  3725400   2835078     76.1  38.04  61.95
## 6  FRIULI-VENEZIA GIULIA   952494    685133     71.9  39.03  60.97
## 11                MOLISE   256600    162434     63.3  39.21  60.78
## 8                LIGURIA  1241469    858552     69.2  39.91  60.08
## 19         VALLE D'AOSTA    99735     70685     70.9  43.25  56.75
## 12              PIEMONTE  3396378   2423617     71.4  43.53  56.47
## 9              LOMBARDIA  7480375   5511544     73.7  44.51  55.49
## 10                MARCHE  1189181    858550     72.2  44.93  55.07
## 18                UMBRIA   675610    492268     72.9  48.82  51.17
## 5         EMILIA-ROMAGNA  3326910   2505525     75.3  50.39  49.61
## 16               TOSCANA  2854129   2105831     73.8  52.51  47.49
## 17   TRENTINO-ALTO ADIGE   792504    566803     71.5  53.87  46.13

Step 3 - Barplot of regional data

barplot(height = t(as.matrix(mydatareg[, c(14:13)])), 
        names.arg = mydatareg$Regione, 
        legend.text = TRUE,  
        args.legend = list(x = "topright", cex = 0.9, inset = c(0.1,-0.1)), col = c("skyblue", "darkorange"), 
        axisnames = TRUE, las = 2, cex.names = 0.6)
hl <- round(100 * sum(mydata[, 7]) / sum(mydata$VotiTotal), digits = 2)
abline(h = hl, col = "red", lwd = 2)

Step 4 - Analyze the data at provincial level

# summarize the data
mydata <- aggregate(x = refdata[, 4:12], 
                    by=list(Provincia=refdata$DESCPROVINCIA), FUN = sum)

mydata$VotiTotal <- mydata$VOTANTI - (mydata$NUMVOTIBIANCHI + mydata$NUMVOTINONVALIDI)
mydata$PercVoti <- round(100*mydata$VotiTotal/mydata$ELETTORI, digits = 1)
mydata$PercSI <- round(100 * mydata[, 6]/mydata$VotiTotal, digits = 2)
mydata$PercNO <- round(100 * mydata[, 7]/mydata$VotiTotal, digits = 2)
# how many are the areas with a majority of NO?
mydata$NOwins <- ifelse(mydata$PercNO > mydata$PercSI, TRUE, FALSE)
table(mydata$NOwins)
## 
## FALSE  TRUE 
##    12    94
# how many are the areas with a percentage of NO >= 60%?
mydata$NOg60 <- ifelse(mydata$PercNO >= 60.0, TRUE, 
                       FALSE)
table(mydata$NOg60)
## 
## FALSE  TRUE 
##    51    55
# reorder the data
mydataprv <- mydata[order(mydata$PercNO, 
                          decreasing = TRUE), ]
# display the 10 highest vote percentages for NO
areaH <- mydataprv[1:10, c(1:2, 11:14)]
areaH
##        Provincia ELETTORI VotiTotal PercVoti PercSI PercNO
## 23       CATANIA   884612    512421     57.9  25.44  74.55
## 62      ORISTANO   136930     84248     61.5  26.02  73.98
## 19      CAGLIARI   659314    418595     63.5  26.22  73.77
## 64       PALERMO  1011728    555583     54.9  27.53  72.46
## 88      SIRACUSA   322752    181724     56.3  28.10  71.90
## 22       CASERTA   713441    419647     58.8  28.31  71.69
## 61         NUORO   176886    104511     59.1  28.75  71.25
## 20 CALTANISSETTA   216007    114994     53.2  28.86  71.14
## 59        NAPOLI  2402977   1348869     56.1  29.62  70.37
## 1      AGRIGENTO   354596    185410     52.3  29.70  70.30
# display the 10 lowest vote percentages for NO
areaL <- mydataprv[106:97, c(1:2, 11:14)]
areaL
##        Provincia ELETTORI VotiTotal PercVoti PercSI PercNO
## 16       BOLZANO   386092    257262     66.6  63.69  36.31
## 34       FIRENZE   748871    577313     77.1  57.71  42.29
## 87         SIENA   204024    154195     75.6  57.18  42.82
## 75         PRATO   180243    132684     73.6  55.72  44.28
## 5         AREZZO   263101    194847     74.1  54.07  45.92
## 57        MODENA   514584    392424     76.3  53.06  46.94
## 77       RAVENNA   294016    222447     75.7  52.82  47.18
## 15       BOLOGNA   758588    580142     76.5  52.30  47.70
## 72       PISTOIA   225615    164471     72.9  52.14  47.86
## 36 FORLI'-CESENA   298785    227359     76.1  51.88  48.12

Step 5 - Barplot of provincial data

library(ggplot2)
library(reshape2)
area <- mydataprv[c(1:10, 106:97), c(1:2, 11:14)]
area <- melt(data = area, id.vars = names(area)[1], 
             measure.vars = names(area)[5:6])
ggdf <- area[c(1:10, 21:30), ]
gh <- ggplot(ggdf, aes(Provincia, value, fill = variable, label = value))
gh <- gh + geom_bar(stat = "identity") + geom_text(size = 3, vjust = 3)
gh <- gh + labs(y = "Vote percentage", title = "Highest results for NO")
gh


Step 6 - Plot of the total results

mydatareg <- melt(data = mydatareg, 
                  id.vars = names(mydatareg)[1], 
                  measure.vars = names(mydatareg)[6:7])
p <- ggplot(mydatareg, aes(x = factor(1), y = value, 
                           fill = variable))
p <- p + geom_bar(stat = "identity", width = 1) + xlab("")
p <- p + coord_polar(theta = "y")
p