Following up from my ipython notebook (http://nbviewer.ipython.org/gist/stevejburr/17f427362fcd18aa568a) where I downloaded data from the website of the Daily Telegraph and analysed how non-voters could have impacted the outcome of the 2015 UK, here I use R to plot the same data on a map.
I got hold of shapefiles of the UK electoral constituencies from the OS website.
My first stage is to set up the required packages and read in the data:
setwd("~/IPython Notebooks")
library(raster)
## Warning: package 'raster' was built under R version 3.1.3
## Warning: package 'sp' was built under R version 3.1.3
library(dplyr)
## Warning: package 'dplyr' was built under R version 3.1.1
#Read in map data
polygons <- shapefile("westminster_const_region.shp")
#Read in CSV file outputted from Python script
vote.data <- read.csv("out.csv")
Some of the names of parties look odd after importing the file (due to spaces etc.)
Sort these out:
#Tidy up the names
original.names <- names(vote.data)
original.names[8] <- "Mebyon.Kernow"
original.names[12] <- "Plaid.Cymru"
original.names[15] <- "UKIP"
names(vote.data) <- original.names
In order to link the data from Python to the map both files need to have matching names
The next step is therefore to apply the same regex logic used in Python to the constituency labels in the map data, and then tweak anything which still doesn’t match up:
#Tidy up the names
original.names <- names(vote.data)
original.names[8] <- "Mebyon.Kernow"
original.names[12] <- "Plaid.Cymru"
original.names[15] <- "UKIP"
names(vote.data) <- original.names
#nead to remove " Boro Const" and "Co Const" out of "polygons$NAME"
#also make lower case
#and remove ", and ()
polygons$NAME <- gsub(" Boro Const","",polygons$NAME)
polygons$NAME <- gsub(" Co Const","",polygons$NAME)
polygons$NAME <- gsub(" Burgh Const","",polygons$NAME)
polygons$NAME <- tolower(polygons$NAME)
polygons$NAME <- gsub(" and furness"," in furness",polygons$NAME)
polygons$NAME <- gsub(" ","_",polygons$NAME)
polygons$NAME <- gsub("\\,","",polygons$NAME)
polygons$NAME <- gsub("\\.","",polygons$NAME)
polygons$NAME <- gsub("\\(|","",polygons$NAME)
polygons$NAME <- gsub("\\)|","",polygons$NAME)
polygons$NAME <- gsub("-","_",polygons$NAME)
First plot the results as they actually were (note that there are blank/missing constituencies in the data that I downloaded due to errors in the website):
#Produce actual results map:
vote.mat <- vote.data[-10]
vote.mat <- vote.mat[-1]
vote.mat <- as.matrix(vote.mat)
#Make numeric
for (i in 1:13) {vote.mat[i]<- as.numeric(vote.mat[i])}
#Create a dataframe with the name of the constituency and the winner
tomerge <- cbind(vote.data[1],colnames(vote.mat)[max.col(vote.mat)])
colnames(tomerge) <- c("NAME","Winner")
names <- as.data.frame(polygons$NAME)
colnames(names)<-"NAME"
tomerge <- left_join(names,tomerge)
## Joining by: "NAME"
#Add the "Winner" to the map dataframe:
polygons$Winner <- c()
for (row in 1:length(polygons)){
#polygons[row, "Winner"] <- tomerge[grep(polygons$NAME[row],tomerge[,1], fixed=TRUE),2]
polygons[row, "Winner"] <- tomerge[match(polygons$NAME[row],tomerge[,1]),2]
}
#Create a look up of different colors so everything is coloured correctly when not all the parties win a seat
#This is important for the "what if" scenarios explored later.
LookUpTable <- data.frame(unique(tomerge$Winner))
colnames(LookUpTable) <- c('WINNER')
LookUpTable <- arrange(LookUpTable, WINNER)
LookUpTable$Colours <- c('blue','green','red','orange','pink','yellow','purple','grey')
#Don't need NA to have its own color
LookUpTable <- LookUpTable[-dim(LookUpTable)[1],]
#Create a vector which has the colors of the matched parties:
colRegions <- as.vector(LookUpTable$Colours[match(sort(unique(polygons$Winner)), LookUpTable$WINNER)])
# Put the plot in it's own chunk
#spplot(polygons,"Winner", col.regions = colRegions, main= "Actual Results")
The resulting plot is as follows (white represents missing data due to errors:
Next step is then to loop through a selection of parties to see what this map would look like if that party had managed to get non-voters to vote for them:
# Finally add a loop to to show Con/Lab/Lib.Dem/UKIP/Green
parties <- c("Cons","Lab","Lib.Dem","UKIP","Green")
plotlist <- list()
for (i in 1:length(parties)) {
vote.mat <- vote.data[-1]
vote.mat <- as.matrix(vote.mat)
this.party <- as.numeric(vote.mat[,parties[1]] )
non.voters <- as.numeric(vote.mat[,"Non.Voters"])
vote.mat[,parties[i]] <- this.party + non.voters
tomerge <- cbind(vote.data[1],colnames(vote.mat)[max.col(vote.mat)])
colnames(tomerge) <- c("NAME","Winner")
names <- as.data.frame(polygons$NAME)
colnames(names)<-"NAME"
tomerge <- left_join(names,tomerge)
polygons$Winner <- c()
for (row in 1:length(polygons)){
#polygons[row, "Winner"] <- tomerge[grep(polygons$NAME[row],tomerge[,1], fixed=TRUE),2]
polygons[row, "Winner"] <- tomerge[match(polygons$NAME[row],tomerge[,1]),2]
}
colRegions <- as.vector(LookUpTable$Colours[match(sort(unique(polygons$Winner)), LookUpTable$WINNER)])
nam <- paste("Plot",i, sep="")
assign(nam,spplot(polygons,"Winner", col.regions = colRegions, main= paste("Results if non voters all picked",parties[i])))
}
## Joining by: "NAME"
## Joining by: "NAME"
## Joining by: "NAME"
## Joining by: "NAME"
## Joining by: "NAME"
Add each of the plots seperately:
From visual inspection we can now see that both the SNP and Labour have a number of “core” seats which do not change hands even in these extreme scenarios, whereas the majority of the country is up for grabs in the event of dramatic non-voter engagment.