Civilians shot and killed by on-duty police officers in United States

From Location: https://www.kaggle.com/washingtonpost/police-shootings

suppressPackageStartupMessages(
  {
    package_list <- c('plotly', 'maps', 'foreach','RColorBrewer', 'dplyr', 'stringr', 'sqldf', 'magrittr','webshot')
    non_installed <- package_list[!(package_list %in% installed.packages()[,"Package"])]
    if(length(non_installed)) install.packages(non_installed)
    library('plotly')
    library('maps')
    library('foreach')
    library('RColorBrewer')
    library('dplyr')
    library('stringr')
    library('sqldf')
    library('magrittr')
    library('webshot')
  }
)

load required R packages

raw_data <- read.csv("police_shootings.csv", stringsAsFactors = F)
raw_data <- raw_data[complete.cases(raw_data),]

#Data Cleaning
colnames(raw_data) <- c("ID", "Name", "Date", "Manner_of_Death",
                                   "Armed", "Age", "Gender", "Race",
                                   "City", "State", "Mental_Illness",
                                   "Attack", "Fleeing", "Body_Cam")


raw_data$Race[raw_data$Race == ""] <- "UNK" 

raw_data_clean <- data.frame(raw_data)
raw_data_clean$Date <- as.Date(raw_data$Date)
raw_data_clean$Mental_Illness <- ifelse(raw_data$Mental_Illness == "True", T, F)
raw_data_clean$Attack <- ifelse(raw_data$Attack == "attack", T, F)
raw_data_clean$Armed <- as.logical(ifelse(raw_data$Armed %in% c("unarmed","undetermined"), F, T))
raw_data_clean$Fleeing <- as.factor(ifelse(raw_data$Fleeing == "", "Not fleeing", raw_data$Fleeing))


raw_data_nonViolent <- sqldf("SELECT * 
                       FROM raw_data_clean
                       WHERE Attack 
                       ORDER BY ID DESC
                       ")

raw_data_plotly <- plot_ly(raw_data_clean) %>%
                   add_markers(x = ~Date, y = ~Age, color = ~Race,
                               text = ~paste("</br>Name: ", Name,
                                             "</br>Race: ", Race,
                                             "</br>Gender: ", Gender)) %>%
                   layout(title = "Date and Age of Victims Killed By Policing in Nonviolent Encounters")

suppressWarnings({raw_data_plotly})

Raw Set of Police Shooting Data

cityShootings <- sqldf("Select COUNT(City) As Shootings, LOWER(City) As City, 
                        State, Race
                         From raw_data_clean
                         Group By City
                         Order By State")

#clean up city names for data join on city and state
cityLoc <- us.cities
cityLoc$name <- foreach(x = 1:length(cityLoc$name), .combine = c) %do%      
                        tolower(rawToChar(charToRaw(cityLoc$name[x])[1:(length(charToRaw(cityLoc$name[x]))-3)]))

colnames(cityLoc) <- c("City", "State", "Population", "Latitude", "Longitude", "Capital")

cityShootingLoc <- sqldf("Select cityShootings.*, Population/cityShootings.Shootings As KillingPer, 
                          Population, Latitude, Longitude
                          From cityShootings Inner Join cityLoc 
                          On cityLoc.City == cityShootings.City And cityLoc.State == cityShootings.State
                          Order By cityShootings.State, cityShootings.City")




cityShooting_geo_plotly <- ggplot2::map_data("state") %>%
                           plot_ly(x = ~long, y = ~lat)%>%
                           group_by(group) %>%
                           add_polygons(name = "USA", hoverinfo = "none") %>%
                           add_markers(name = "Shootings", data = cityShootingLoc[ifelse(
                             cityShootingLoc$State %in% c('HI','AK'), FALSE, TRUE),], 
                                       x = ~Longitude, y = ~Latitude, 
                                       color = ~KillingPer, colors = "Spectral",
                                       size = ~Shootings,
                                       text = ~paste("</br>City: ", City,
                                                     "</br>State: ", State,
                                                     "</br>Population: ", Population,
                                                     "</br>Shootings: ", Shootings),
                                       hoverinfo = text) %>%
                          layout(title = "Police Shootings in USA 2015 - 2017", 
                                 xaxis = list(title = "Longitude"),
                                 yaxis = list(title = "Latitude"))


cityShooting_geo_plotly
#cityShooting_geo_gg
ByRaceCount <- sqldf("SELECT race As Race, COUNT( race ) As Number
                      FROM raw_data_clean
                      GROUP BY race")

#data on race in the US: https://www.census.gov/quickfacts/fact/table/US/PST045216
ByRaceCount$RacePCT_2016 <-  c( "A" = 0.057, 
                                "B" = 0.133, 
                                "H" = 0.178, 
                                "N" = 0.013, 
                                "O" = 0.002,
                                "UNK" = 0.004,
                                "W" = 0.613)

ByRaceCount$Expected_Number <- ceiling(sum(ByRaceCount$Number) * ByRaceCount$RacePCT_2016)

ByRaceCount_plotly <- plot_ly() %>% 
                      add_pie(data = ByRaceCount[c("Race", "Number")], 
                              name = "Actual Result", labels =~Race, values = ~Number,
                              domain = list(x = c(0, 0.4), y = c(0.4, 0.8))) %>% 
                      add_pie(data = ByRaceCount[c("Race", "Expected_Number")],
                              name = "Expected Result", labels = ~Race, values = ~Expected_Number,
                              domain = list(x = c(0.6, 1), y = c(0.4, 0.8))) %>%
                      layout(title = "Police Shootings By Race 2015-2017 Actual vs Expected
                                      (Using 2016 Population Percentages)",
                             xaxis = list(showgrid = FALSE, zeroline = FALSE, showticklabels = FALSE),
                             yaxis = list(showgrid = FALSE, zeroline = FALSE, showticklabels = FALSE))


ByRaceCount_plotly