suppressPackageStartupMessages(
{
package_list <- c('plotly', 'maps', 'foreach','RColorBrewer', 'dplyr', 'stringr', 'sqldf', 'magrittr','webshot')
non_installed <- package_list[!(package_list %in% installed.packages()[,"Package"])]
if(length(non_installed)) install.packages(non_installed)
library('plotly')
library('maps')
library('foreach')
library('RColorBrewer')
library('dplyr')
library('stringr')
library('sqldf')
library('magrittr')
library('webshot')
}
)
load required R packages
raw_data <- read.csv("police_shootings.csv", stringsAsFactors = F)
raw_data <- raw_data[complete.cases(raw_data),]
#Data Cleaning
colnames(raw_data) <- c("ID", "Name", "Date", "Manner_of_Death",
"Armed", "Age", "Gender", "Race",
"City", "State", "Mental_Illness",
"Attack", "Fleeing", "Body_Cam")
raw_data$Race[raw_data$Race == ""] <- "UNK"
raw_data_clean <- data.frame(raw_data)
raw_data_clean$Date <- as.Date(raw_data$Date)
raw_data_clean$Mental_Illness <- ifelse(raw_data$Mental_Illness == "True", T, F)
raw_data_clean$Attack <- ifelse(raw_data$Attack == "attack", T, F)
raw_data_clean$Armed <- as.logical(ifelse(raw_data$Armed %in% c("unarmed","undetermined"), F, T))
raw_data_clean$Fleeing <- as.factor(ifelse(raw_data$Fleeing == "", "Not fleeing", raw_data$Fleeing))
raw_data_nonViolent <- sqldf("SELECT *
FROM raw_data_clean
WHERE Attack
ORDER BY ID DESC
")
raw_data_plotly <- plot_ly(raw_data_clean) %>%
add_markers(x = ~Date, y = ~Age, color = ~Race,
text = ~paste("</br>Name: ", Name,
"</br>Race: ", Race,
"</br>Gender: ", Gender)) %>%
layout(title = "Date and Age of Victims Killed By Policing in Nonviolent Encounters")
suppressWarnings({raw_data_plotly})
Raw Set of Police Shooting Data
cityShootings <- sqldf("Select COUNT(City) As Shootings, LOWER(City) As City,
State, Race
From raw_data_clean
Group By City
Order By State")
#clean up city names for data join on city and state
cityLoc <- us.cities
cityLoc$name <- foreach(x = 1:length(cityLoc$name), .combine = c) %do%
tolower(rawToChar(charToRaw(cityLoc$name[x])[1:(length(charToRaw(cityLoc$name[x]))-3)]))
colnames(cityLoc) <- c("City", "State", "Population", "Latitude", "Longitude", "Capital")
cityShootingLoc <- sqldf("Select cityShootings.*, Population/cityShootings.Shootings As KillingPer,
Population, Latitude, Longitude
From cityShootings Inner Join cityLoc
On cityLoc.City == cityShootings.City And cityLoc.State == cityShootings.State
Order By cityShootings.State, cityShootings.City")
cityShooting_geo_plotly <- ggplot2::map_data("state") %>%
plot_ly(x = ~long, y = ~lat)%>%
group_by(group) %>%
add_polygons(name = "USA", hoverinfo = "none") %>%
add_markers(name = "Shootings", data = cityShootingLoc[ifelse(
cityShootingLoc$State %in% c('HI','AK'), FALSE, TRUE),],
x = ~Longitude, y = ~Latitude,
color = ~KillingPer, colors = "Spectral",
size = ~Shootings,
text = ~paste("</br>City: ", City,
"</br>State: ", State,
"</br>Population: ", Population,
"</br>Shootings: ", Shootings),
hoverinfo = text) %>%
layout(title = "Police Shootings in USA 2015 - 2017",
xaxis = list(title = "Longitude"),
yaxis = list(title = "Latitude"))
cityShooting_geo_plotly
#cityShooting_geo_gg
ByRaceCount <- sqldf("SELECT race As Race, COUNT( race ) As Number
FROM raw_data_clean
GROUP BY race")
#data on race in the US: https://www.census.gov/quickfacts/fact/table/US/PST045216
ByRaceCount$RacePCT_2016 <- c( "A" = 0.057,
"B" = 0.133,
"H" = 0.178,
"N" = 0.013,
"O" = 0.002,
"UNK" = 0.004,
"W" = 0.613)
ByRaceCount$Expected_Number <- ceiling(sum(ByRaceCount$Number) * ByRaceCount$RacePCT_2016)
ByRaceCount_plotly <- plot_ly() %>%
add_pie(data = ByRaceCount[c("Race", "Number")],
name = "Actual Result", labels =~Race, values = ~Number,
domain = list(x = c(0, 0.4), y = c(0.4, 0.8))) %>%
add_pie(data = ByRaceCount[c("Race", "Expected_Number")],
name = "Expected Result", labels = ~Race, values = ~Expected_Number,
domain = list(x = c(0.6, 1), y = c(0.4, 0.8))) %>%
layout(title = "Police Shootings By Race 2015-2017 Actual vs Expected
(Using 2016 Population Percentages)",
xaxis = list(showgrid = FALSE, zeroline = FALSE, showticklabels = FALSE),
yaxis = list(showgrid = FALSE, zeroline = FALSE, showticklabels = FALSE))
ByRaceCount_plotly