Load necessary packages
library(leaflet)
library(tidyverse)
## ── Attaching packages ──────────────────────────────────────────────────────────────── tidyverse 1.2.1 ──
## ✔ ggplot2 3.1.0 ✔ purrr 0.2.5
## ✔ tibble 2.0.0 ✔ dplyr 0.7.8
## ✔ tidyr 0.8.2 ✔ stringr 1.3.1
## ✔ readr 1.3.1 ✔ forcats 0.3.0
## ── Conflicts ─────────────────────────────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
library(lubridate)
##
## Attaching package: 'lubridate'
## The following object is masked from 'package:base':
##
## date
library(htmltools)
library(rtweet)
##
## Attaching package: 'rtweet'
## The following object is masked from 'package:purrr':
##
## flatten
library(readxl)
library(broom)
library(plotly)
##
## Attaching package: 'plotly'
## The following object is masked from 'package:ggplot2':
##
## last_plot
## The following object is masked from 'package:stats':
##
## filter
## The following object is masked from 'package:graphics':
##
## layout
library(DT)
Read in the shooting dataset
shootings <- read_csv("https://docs.google.com/spreadsheets/d/1b9o6uDO18sLxBqPwl_Gh9bnhW-ev_dABH83M5Vb5L8o/export?format=csv")
## Warning: Duplicated column names deduplicated: 'location' =>
## 'location_1' [8]
## Parsed with column specification:
## cols(
## .default = col_character(),
## fatalities = col_double(),
## injured = col_double(),
## total_victims = col_double(),
## age_of_shooter = col_double(),
## latitude = col_double(),
## longitude = col_double(),
## year = col_double()
## )
## See spec(...) for full column specifications.
Create a map of the shootings
shootings %>%
leaflet() %>%
addTiles() %>%
setView(lat = 39.04, lng = -100, zoom = 4) %>%
addMarkers(lat = 39.04, lng = -100) %>%
addCircleMarkers(radius = ~log(total_victims))
## Assuming "longitude" and "latitude" are longitude and latitude, respectively
Display median number of total_victims and median number of fatalaties
shootings %>%
summarize(median_total_victims = median(total_victims))
shootings %>%
summarize(median_fatalaties = median(fatalities))
Create a histogram of the number of shootings per year
shootings %>%
plot_ly(x = ~year) %>%
add_histogram(nbinsx = 40)
Create a heatmap showing the age of the shooter versus their gender. It it interesting to note that there are cases of both male and female shooters working together. According to this heatmap, shooters are predominantly going to be men in their mid to late 20s.
shootings %>%
mutate(gender = fct_collapse(gender,
Male = c("M", "Male"),
Female = c("F", "Female"))) %>%
plot_ly(x = ~age_of_shooter, y = ~gender) %>%
add_histogram2dcontour()
Creae a scatterplot depciting the number of people injured by the number of fatalaties in US shootings.
shootings %>%
plot_ly(x = ~injured,
y = ~fatalities) %>%
add_markers()
Conduct a Regression Analysis
num_per_year <- shootings %>%
filter(fatalities > 3) %>%
count(year) %>%
filter(year < 2019)
num_per_year
Calcuate the number of shootings per year.
num_per_year <- shootings %>% # start with the shootings data
group_by(year) %>% # we're going to count by year
filter(year < 2019) %>%
summarize(count = n(), num_per_year = sum(year)) %>% # get the total number of fatalities per year
mutate(num_per_year = num_per_year/year) # divide by the number of shooting incidents
num_per_year
Create a regression model
num_per_year_model <- lm(num_per_year ~ year, data = num_per_year)
Display the regression model within the scatterplot
num_per_year %>%
plot_ly(x = ~year,
y = ~num_per_year,
hoverinfo = text,
text = ~paste("Shootings per year: ", num_per_year, "<br>", "Year: ", year)) %>%
add_markers() %>%
add_lines(y = ~fitted(num_per_year_model)) %>%
layout(title = "Number of Shootings Per Year",
xaxis = list(title = "Year"),
yaxis = list(title = "Number of Shootings"))
According to this regression model, the number of shootings has increased since the early 1980s and may continue to rise over the next few years.