---
title: "London Homicides 2008 to 2020"
author: "David Harrington"
output:
flexdashboard::flex_dashboard:
social: menu
source_code: embed
---
```{r global, include=FALSE}
library(flexdashboard)
library(knitr)
library(readxl)
library(ggplot2)
library(dplyr)
library(tidyr)
library(tibble)
library(stringr)
library(lubridate)
library(leaflet)
library(leaflet.extras)
library(tidyverse)
library(reshape)
library(ggridges)
library(rgeos)
library(ggmap)
library(maptools)
library(broom)
library(mapproj)
library(htmlwidgets)
library(htmltools)
library(plotly)
# data import
library(readxl)
mmap18 <- read_excel("C:/Users/david/OneDrive - RMIT University/Data Visualisation and Communication/London Homicides Project/mmap.xlsx")
# clean up the orginal data set and eliminate redundant variables
library(dplyr)
mmap18 = select(mmap18, -6,-7,-8,-13,-14,-16,-17,-18,-19)
# reorder columns
mmap18 <- mmap18[ , c(2, 1, 8, 9,5,4,3,10,6,11,7)]
# update column names
colnames(mmap18) <- c("date", "ID", "latitude", "longitude", "ethnicity", "sex", "age", "ageGroup", "weapon", "borough", "status")
# change strings to factors
mmap18 <- mmap18 %>% mutate_if(is.character, as.factor)
# import the 2019 murder map data into R
mmap19 <- read_excel("C:/Users/david/OneDrive - RMIT University/Data Visualisation and Communication/London Homicides Project/Murdermap2019.xlsx")
mmap19 = select(mmap19, -3,-6)
mmap19$longitude <- as.numeric(mmap19$longitude)
# change strings to factors
mmap19 <- mmap19 %>% mutate_if(is.character, as.factor)
colnames(mmap19) <- c("date", "ID", "latitude", "longitude", "ethnicity", "sex", "age", "ageGroup", "weapon", "borough", "status")
# import the 2019 murder map data into R
mmap20 <- read_excel("C:/Users/david/OneDrive - RMIT University/Data Visualisation and Communication/London Homicides Project/Murdermap2020.xlsx")
mmap20 = select(mmap20, -3,-6)
mmap20 <- mmap20 %>% mutate_if(is.character, as.factor)
colnames(mmap20) <- c("date", "ID", "latitude", "longitude", "ethnicity", "sex", "age", "ageGroup", "weapon", "borough", "status")
##Merge mmap19 and mmap 20 using rbind function
mmap19_20 = rbind(mmap19,mmap20)
#change the levels for "sex" to align with mmap18
mmap19_20 <- mmap19_20 %>%
mutate(sex = fct_recode(sex,
"M" = "Male",
"F" = "Female"))
# merge mmap19_20 with mmap18 to create mmap (integrated cases from 2008 to 2020)
mmap = rbind(mmap18, mmap19_20)
# See all levels in weapon variable
levels(mmap$weapon)
# Replicate weapon column to enable re-factoring
mmap2 = cbind(mmap, weapon2=rep(mmap$weapon))
# Change levels in Weapon column to include just 4 levels Knife, Gun, Assault and Other
mmap3 <- mmap2 %>% mutate( weapon = fct_recode(weapon,
"Other" = "Arson",
"Other" = "Vehicle",
"Other" = "Drug",
"Other" = "Poison",
"Other" = "Unknown",
"Other" = "Ligature",
"Other" = "Strangulation",
"Other" = "Fire",
"Other" = "Other",
"Other" = "Blunt Object",
"Assault" = "None"))
# clean up the factor names within borough variable to enable the bind with boroughpop (ensure they are exactly the same!)
mmap33 <- mmap3 %>% mutate(borough = fct_recode(borough,
"Westminster" = "City of Westminster",
"Kingston upon Thames" = "Kingston-upon-Thames",
"Kingston upon Thames" = "Kingston Upon Thames",
"Kingston upon Thames" = "Kingston-Upon-Thames",
"Richmond upon Thames" = "Richmond"))
# Replicate date column
mmap4 = cbind(mmap33, date2=rep(mmap3$date))
# create new data frame mmap5 and separate date variable from mmap4 into year, month and day and save them as integers
mmap5 = separate(mmap4, date2, c("year", "month", "day"), "-") %>%
mutate_if(is.character, as.integer)
mmap5 <- mmap5 %>% mutate(status2 = fct_recode(status,
"Awaiting Trial" = "Awaiting trial",
"Awaiting Trial" = "Awaiting Outcome",
"Solved" = "Solved",
"Solved" = "Self Defence",
"Unsolved" = "Suspect dead",
"Unsolved" = "Unsolved"))
mmap5 <- mmap5 %>% mutate(ethnicity = fct_recode(ethnicity,
"Other" = "Mixed",
"Black" = "Black",
"Black" = "Black or Black British",
"White" = "White",
"White" = "White or White British",
"Asian" = "Asian",
"Asian" = "Asian or Asian British",
"Other" = "Unknown",
"Other" = "NA",
"Other" = "Any Other Ethnic Appearance"))
levels(mmap5$ethnicity)
# read the boroughpop file into R to access borough population data
boroughpop <- read_excel("boroughpop.xlsx")
# change column names in boroughpop
colnames(boroughpop) = c("borough", "population")
# join mmap5 and boroughpop data frames using "borough"
mmap6 <- left_join(mmap5, boroughpop, by = "borough")
# make bourough a factor variable
mmap6$borough <- as.factor(mmap6$borough)
# create new variable for counting homicides by borough and homicides per 100,000 population by borough
mmap6_sum <- mmap6 %>% select(borough, population) %>% group_by(borough) %>% mutate(count = n(), population = population, homicide_rate = (count*100000)/11/population) %>% distinct()
# create mmap 7 by filtering mmap5 to just include full year data to end of calendar 2019
mmap7 <- mmap6 %>% filter(year != "2020")
mmap7 <-data.frame(mmap7)
# remove NA cases for ethicity
mmap8 <- mmap7 %>% filter(ethnicity != "NA")
# save mmap6_sum as data frame and name the data frame mmap9
mmap9 <- as.data.frame(mmap6_sum)
# create new data frame mmap10 with just borough and homicide rate variables
mmap10 <- mmap9 %>% select(borough, homicide_rate)
# round the homicide_rate variable to 2 decimal places
mmap10$homicide_rate <- round(mmap10$homicide_rate, 2)
# arrange the data frame based on homicide rate variable descending order
mmap10 <- mmap10 %>% arrange(desc(mmap10$homicide_rate))
mmap7$year <- as.factor(mmap7$year)
# calculate sum borough population based on the 2016 ONS data for boroughs
GreaterLondonPop <- sum(boroughpop$population)
GreaterLondonPop
# create vector for Greater London Population for 2008 to 2019
GreaterLondonPopMillions <- c(7.8, 7.9, 8.0, 8.15, 8.25, 8.35, 8.5, 8.7, 8.82, 8.85, 8.9, 9.0)
# Greater London Population in 100,000s
GreaterLondon <- GreaterLondonPopMillions*10
# Greater London total homicides
LondonHomicides <- c(155, 130, 128, 121, 103, 113, 90, 120, 107, 130, 131, 149)
Year <- c(2008, 2009, 2010, 2011, 2012, 2013, 2014, 2015, 2016, 2017, 2018, 2019)
# merge vectors into a data frame
London <- data.frame(Year, LondonHomicides, GreaterLondon)
London
# Number of homicides per 100,000 population, 2008-2019
LondonHomicideRate <- London %>% mutate(Homicide_Rate = LondonHomicides/GreaterLondon)
LondonHomicideRate$Homicide_Rate <- round(LondonHomicideRate$Homicide_Rate,2)
LondonHomicideRate$Year <- as.factor(LondonHomicideRate$Year)
str(LondonHomicideRate)
sex <- table(mmap5$sex) # Victim gender table
addmargins(sex) # Plus totals
# create new data frame to show ageGroup data by year
ageyear <- mmap7 %>%
select(ageGroup, year) %>%
group_by(ageGroup, year) %>%
tally() %>%
spread(ageGroup, n)
# calculate total homicides per ageGroup across 2008 to 2019
homicidesA <- sum(ageyear$`A. Child 0-6`, na.rm= TRUE)
homicidesB <- sum(ageyear$`B. Child 7-12`, na.rm= TRUE)
homicidesC <- sum(ageyear$`C. Teen 13-16`, na.rm= TRUE)
homicidesD <- sum(ageyear$`D. Teen 17-19`, na.rm= TRUE)
homicidesE <- sum(ageyear$`E. Adult 20-24`, na.rm= TRUE)
homicidesF <- sum(ageyear$`F. Adult 25-34`, na.rm= TRUE)
homicidesG <- sum(ageyear$`G. Adult 35-44`, na.rm= TRUE)
homicidesH <- sum(ageyear$`H. Adult 45-54`, na.rm= TRUE)
homicidesI <- sum(ageyear$`I. Adult 55-64`, na.rm= TRUE)
homicidesJ <- sum(ageyear$`J. Adult 65 over`, na.rm= TRUE)
# consolidate total homicides per ageGroup across 2008 to 2019 into a new vector homicides_ageGroup
homicides_ageGroup <- c(homicidesA, homicidesB, homicidesC, homicidesD, homicidesE, homicidesF, homicidesG, homicidesH, homicidesI, homicidesJ)
# create vector agegp with unique values for the variable "ageGroup" in mmap5
agegp <- unique(mmap5$ageGroup)
# refactor to order the values for the variable "ageGroup" from youngest to oldest
agegp <- factor(c("A. Child 0-6", "B. Child 7-12", "C. Teen 13-16", "D. Teen 17-19", "E. Adult 20-24", "F. Adult 25-34","G. Adult 35-44","H. Adult 45-54","I. Adult 55-64","J. Adult 65 over"))
# create vector to calculate average homicides per year over 11 years from 2008 to 2019
homicides_ageGroup_avg <- homicides_ageGroup/11
# create population vector by age group - Source: ONS small area population estimates mid-2016 (latest ONS survey for UK)
popn <- c(872040, 659384, 374989, 278134, 557975, 1654605, 1408946, 1151408, 828359, 1039161)
# create homicide-rate2 vector
homicide_rate2 <- homicides_ageGroup_avg / popn * 100000
# Create a data frame from vectors
mmap11 <- data.frame(agegp, round(homicide_rate2,2))
colnames(mmap11) <- c("Age_Group", "Homicide_Rate")
# calculate mean and median age for all homicides in London 2008-2020
meanAge <- mean(mmap5$age)
meanAge
medianAge <- median(mmap5$age)
medianAge
# Trends in Status
## create tables showing probability of being solved given weapon, ethnicity and borough
# by weapon type
status <- table(mmap5$weapon, mmap5$status, dnn = c("weapon", "status"))
status
statusProp <- prop.table(status, 1)
statusProp
# save as data frame
weaponStatus <- data.frame(statusProp)
# filter to just include probability of being solved
weaponStatus1 <- weaponStatus %>% filter(status == "Solved")
# change to percent
weaponStatus1$percent <- round(weaponStatus1$Freq,2)*100
mmap13 <- weaponStatus1 %>% arrange(weaponStatus1$percent)
mmap13$percent <- as.integer(mmap13$percent)
# by ethnicity
levels(mmap8$ethnicity)
status2 <- table(mmap8$ethnicity, mmap8$status, dnn = c("ethnicity", "status"))
status2
status2Prop <- round(prop.table(status2, 1),2)
status2Prop
# save as data frame
ethnicityStatus <- data.frame(status2Prop)
# filter to just include probability of being solved
ethnicityStatus1 <- ethnicityStatus %>% filter(status == "Solved")
# change to percent
ethnicityStatus1$percent <- round(ethnicityStatus1$Freq,2)*100
mmap14 <- ethnicityStatus1 %>% arrange(ethnicityStatus1$percent)
# by ageGroup
levels(mmap8$ageGroup)
status4 <- table(mmap8$ageGroup, mmap8$status, dnn = c("ageGroup", "status"))
status4
status4Prop <- round(prop.table(status4, 1),2)
status4Prop
# save as data frame
AgeGroupStatus <- data.frame(status4Prop)
# filter to just include probability of being solved
AgeGroupStatus1 <- AgeGroupStatus %>% filter(status == "Solved")
# change to percent
AgeGroupStatus1$percent <- round(AgeGroupStatus1$Freq,2)*100
# by location
status3 <- table(mmap5$borough, mmap5$status, dnn = c("borough", "status"))
status3
status3Prop <- prop.table(status3, 1)
status3Prop
# save as data frame
boroughStatus <- data.frame(status3Prop)
# filter to just include probability of being solved
boroughStatus1 <- boroughStatus %>% filter(status == "Solved")
# change to percent
boroughStatus1$percent <- round(boroughStatus1$Freq,2)*100
# Install ggplot2 mapping packages
library(rgeos)
library(ggmap)
library(maptools)
library(broom)
library(mapproj)
# create Leaflet interactive cloropeth chart for Homicide Rate
# read shp file for just London boroughs
London_Ward2 <- readShapeSpatial("London_Borough_Excluding_MHW.shp")
# convert coordinates into lat long degrees
proj4string(London_Ward2) <- CRS("+init=epsg:27700")
London_Ward2.wgs84 <- spTransform(London_Ward2, CRS("+init=epsg:4326"))
library(leaflet)
# For Leaflet choropleth maps, we need to use a SpatialPolygonDataFrame
class(London_Ward2.wgs84)
names(London_Ward2.wgs84)
# Rename the borough variable in mmap10 to "NAME" to match the "SpatialPolygonsDataFrame" file London_Ward2.wgs84
names(mmap10)[names(mmap10) == "borough"] <- "NAME"
str(mmap10)
mmap10 <- mmap10 %>% mutate(NAME = fct_recode(NAME,
"City of London" = "City and County of the City of London",
"Westminster" = "City of Westminster"))
levels(mmap10$NAME)
# merge mmap data into London_Ward.wgs84 SpatialPolygonsDataFrame using "DISTRICT"
merge.mmap10 <- sp::merge(London_Ward2.wgs84, mmap10, by = "NAME", duplicateGeoms = TRUE)
# create a discrete colour scale based on quantiles for five levels
bins <- quantile(
mmap10$homicide_rate,
probs = seq(0,1,.2), names = FALSE, na.rm = TRUE)
bins
```
Summary {data-orientation=Column}
=======================================================================
Column {data-width=500}
-----------------------------------------------------------------------
### After decling from 2008, homicides increased after 2014
```{r}
p2 <- ggplot(mmap7, aes(x = year)) +
geom_bar(stat='count', fill = "navyblue", color = "black") +
labs(x = "Year", y = "Number of Homicides", size = 3)+
theme_bw()
ggplotly(p2)
```
### The homicide rate per 100,000 population has also increased since 2014
```{r}
p4 <- ggplot(LondonHomicideRate, aes(x=Year, y=Homicide_Rate)) +
geom_bar(stat="identity", fill="navyblue", color = "black")+
labs(x = "Year", y = "Rate", size = 3)
ggplotly(p4)
```
```
```
Column {data-width=600}
-----------------------------------------------------------------------
### Homicide rates per 100,000 population are highest in the centre of London (average for 2008 to 2020)
```{r}
pal <- colorBin(
"YlOrRd",
domain = mmap10$homicide_rate,
bins = bins
)
# add the colour scale to the choropleth map.
labels <- sprintf(
"%s
%g homicide_rate",
merge.mmap10$NAME,
merge.mmap10$homicide_rate
) %>% lapply(htmltools::HTML)
title <- tags$div(
HTML('London Homicides 2008-2020
')
)
p3 <- leaflet(merge.mmap10) %>%
setView(lng = -0.118092 , lat = 51.509865, zoom = 10)
p3 %>% addPolygons(
fillColor = ~pal(homicide_rate),
weight = 2,
opacity = 1,
color = "white",
dashArray = "3",
fillOpacity = 0.7,
highlight = highlightOptions(
weight = 5,
color = "#666",
dashArray = "",
fillOpacity = 0.7,
bringToFront = TRUE),
label = labels,
labelOptions = labelOptions(
style = list("font-weight" = "normal", padding = "3px 8px"),
textsize = "15px",
direction = "auto")) %>%
addLegend(pal = pal,
values = ~homicide_rate,
opacity = 0.7, title = "Homicide Rate",
position = "bottomright") %>%
addControl(title, position = "topright")
```
Column
-----------------------------------------------------------------------
### The median victim age for all homicides is 30 (2008 to 2020)
```{r}
p6 <- ggplot(mmap5, aes(x = age))+
geom_density(fill = "red", alpha = 0.5)+
geom_histogram(colour = "white", fill = "navyblue", aes(age, ..density..), alpha = 0.5, bins = 100)+
labs(x = "Victim Age", y = "Proportion of Homicides")+
geom_vline(xintercept= median(mmap7$age)) +
annotate("text",label = "Mean = 34.6",x = 43, y = 0.04, colour = "red") +
geom_vline(xintercept= mean(mmap7$age),linetype=2) +
annotate("text",label = "Median = 30.0",x = 39, y = 0.035, colour = "red")+
scale_x_continuous(limits = c(0, 100))+
theme_bw()
ggplotly(p6)
```
### The number of black victims of homicide has grown sharply since 2014 (2008-2019)
```{r}
p16 <- ggplot(mmap8, aes(x = year, fill = ethnicity)) +
geom_density(stat='count', alpha = 0.6, colour = "black") +
scale_x_continuous(breaks = c(2008:2019)) +
scale_fill_brewer(palette = "Set1") +
labs(x = "Calendar Year", y = "Number of Homicides", fill = "legend")+
theme_bw()
ggplotly(p16)
```
### Death by a knife is the largest and most rapdily growing cause of death (2008-2020)
```{r}
p17 <- ggplot(mmap8, aes(x = year, fill = weapon)) +
geom_density(stat='count', alpha = 0.6, colour = "black") +
scale_x_continuous(breaks = c(2008:2020)) +
scale_fill_brewer(palette = "Set1") +
labs(x = "Calendar Year", y = "Number of Homicides", fill = "legend")+
theme_bw()
ggplotly(p17)
```
```
```
By Gender and Age {data-orientation=Column}
=======================================================================
Column {data-width=450}
-----------------------------------------------------------------------
### Male victims of homicide are younger (2008-2020)
```{r}
p9 <- ggplot(mmap7, aes(x = age, fill = sex)) +
geom_density(alpha = 0.8)+
scale_fill_brewer(palette = "Set1")+
labs(fill = "Legend")+
scale_x_continuous("Victim Age")+
scale_y_continuous("Proportion of Homicides")+
theme_bw()
ggplotly(p9)
```
### There is a concentration of Male victims younger ages (2008-2020)
```{r}
p10 <- ggplot(data = mmap5, aes(x = sex, y = age)) + geom_boxplot(outlier.shape = NA) + geom_jitter(alpha = 1/5) +
ylab("age of victim") +
stat_summary(fun.y="mean", colour="red", geom="point",shape = 16, size = 4) +
theme_minimal()
ggplotly(p10)
```
```
```
Column {data-width=500}
-----------------------------------------------------------------------
### There are larger concentrations of female victims near the centre of London (2008 - 2020)
```{r}
femaleVictims <- mmap5 %>%
filter(sex == 'F')
femaleVictims %>% # Map showing all point data
leaflet() %>%
setView(lng =-0.1, lat=51.51, zoom=10) %>%
addProviderTiles(providers$OpenStreetMap) %>%
addScaleBar %>%
addCircleMarkers(lng = ~longitude, lat = ~latitude, popup = ~paste0("", ID, "", "
", date, "
", age, "
", sex, "
", weapon, "
", borough), radius = 5, clusterOptions = markerClusterOptions)
```
```
```
Column
-----------------------------------------------------------------------
### A higher proportion of black homicides are young (2008-2019)
```{r}
p13 <- ggplot(mmap8, aes(x = age, fill = ethnicity)) +
geom_density(alpha = 0.6)+
scale_fill_brewer(palette = "Set1")+
labs(fill = "Legend")+
scale_x_continuous("Victim Age")+
scale_y_continuous("Proportion of Ethnicity")+
theme_bw()
ggplotly(p13)
```
### 77% of all homicide victims in London are male (2008-2020)
```{r}
p18 <- ggplot(mmap8, aes(x = year, fill = sex)) +
geom_density(stat='count', alpha = 0.6, colour = "black") +
scale_x_continuous(breaks = c(2008:2020)) +
scale_fill_brewer(palette = "Set1") +
labs(x = "Calendar Year", y = "Number of Homicides", fill = "legend")+
theme_bw()
ggplotly(p18)
```
```
```
By Ethnicity and Cause of Death {data-orientation=Column}
=======================================================================
Column {data-width=450}
-----------------------------------------------------------------------
### Black victims of homicide are younger than White, Asian or other ethnicities (2008-2019)
```{r}
p14 <- ggplot(data = mmap8, aes(x = ethnicity, y = age), na.rm = TRUE)+ geom_boxplot(outlier.shape = NA) + geom_jitter(alpha = 1/5) +
ylab("age of victim") +
stat_summary(fun.y="mean", colour="red", geom="point",shape = 16, size = 4) +
theme_minimal()
ggplotly(p14)
```
### Homicides involving black victims are less likely to get solved (2008-2020)
```{r}
p23 <- ggplot(mmap14, aes(x= reorder(ethnicity, percent), y=percent))+
geom_bar(stat="identity", fill="navyblue", color = "black")+
coord_flip()+
labs(x = "Ethnicity", y = "Percent Solved")+
theme_bw()
ggplotly(p23)
```
```
```
Column {data-width=500}
-----------------------------------------------------------------------
### Location of homicides by ethnicity of victim (2008-2020)
```{r}
pal2 <- colorFactor(palette = c("darkgreen", "purple", "blue", "red"),
levels = c("Black", "White", "Asian", "Other")) # Creating a 4 colour palette
black <- mmap5 %>%
filter(ethnicity == "Black") # Grouping layers - black
white <- mmap5 %>%
filter(ethnicity == "White") # Grouping layers - white
asian <- mmap5 %>%
filter(ethnicity == "Asian") # Grouping layers - asian
other <- mmap5 %>%
filter(ethnicity == "Other") # Grouping layers - other
mmap5 %>%
leaflet() %>%
setView(lng=-0.1, lat=51.51, zoom=10) %>%
addProviderTiles("CartoDB") %>%
addScaleBar(position = "bottomleft") %>%
addCircleMarkers(data = black,
lng = ~longitude,
lat = ~latitude,
radius = 2,
color = ~pal2(ethnicity),
group = "Black",
popup = ~paste("", ID, "", "
", date, "
", age, "
", sex, "
", weapon, "
", status)) %>%
addCircleMarkers(data = white,
lng = ~longitude,
lat = ~latitude,
radius = 2,
color = ~pal2(ethnicity),
group = "White",
popup = ~paste("", ID, "", "
", date, "
", age, "
", sex, "
", weapon, "
", status)) %>%
addCircleMarkers(data = asian,
lng = ~longitude,
lat = ~latitude,
radius = 2,
color = ~pal2(ethnicity),
group = "Asian",
popup = ~paste("", ID, "", "
", date, "
", age, "
", sex, "
", weapon, "
", status)) %>%
addCircleMarkers(data = other,
lng = ~longitude,
lat = ~latitude,
radius = 2,
color = ~pal2(ethnicity),
group = "Other",
popup = ~paste("", ID, "", "
", date, "
", age, "
", sex, "
", weapon, "
", status)) %>%
addLegend(position = "bottomright",
pal = pal2,
values = c("Black", "White", "Asian", "Other"),
title = "Legend") %>%
addLayersControl(
overlayGroups = c("Black",
"White",
"Asian",
"Other"))
```
```
```
Column
-----------------------------------------------------------------------
### Gun victims are younger on average than victims of other weapons (2008-2020)
```{r}
p19 <- ggplot (data = mmap8, aes(x = weapon, y = age))+ geom_boxplot() + geom_jitter(alpha = 2/5) +
ylab("age of victim") +
stat_summary(fun.y="mean", colour="red", geom="point",shape = 16, size = 4) +
theme_minimal()
ggplotly(p19)
```
### Homicides involving guns are less likely to get solved (2008-2020)
```{r}
p22 <- ggplot(mmap13, aes(x= reorder(weapon, percent), y=percent))+
geom_bar(stat="identity", fill="navyblue", color = "black")+
geom_label(size = 4, aes(label = percent, hjust = 0.2))+
coord_flip()+
labs(x = "Weapon", y = "Percent Solved")+
theme_bw()
ggplotly(p22)
```