Load necessary packages

library(leaflet)
library(tidyverse)
## ── Attaching packages ──────────────────────────────────────────────────────────────── tidyverse 1.2.1 ──
## ✔ ggplot2 3.1.0     ✔ purrr   0.2.5
## ✔ tibble  2.0.0     ✔ dplyr   0.7.8
## ✔ tidyr   0.8.2     ✔ stringr 1.3.1
## ✔ readr   1.3.1     ✔ forcats 0.3.0
## ── Conflicts ─────────────────────────────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
library(lubridate)
## 
## Attaching package: 'lubridate'
## The following object is masked from 'package:base':
## 
##     date
library(htmltools)
library(rtweet)
## 
## Attaching package: 'rtweet'
## The following object is masked from 'package:purrr':
## 
##     flatten
library(readxl)
library(broom)
library(plotly)
## 
## Attaching package: 'plotly'
## The following object is masked from 'package:ggplot2':
## 
##     last_plot
## The following object is masked from 'package:stats':
## 
##     filter
## The following object is masked from 'package:graphics':
## 
##     layout
library(DT)

Read in the shooting dataset

shootings <- read_csv("https://docs.google.com/spreadsheets/d/1b9o6uDO18sLxBqPwl_Gh9bnhW-ev_dABH83M5Vb5L8o/export?format=csv")
## Warning: Duplicated column names deduplicated: 'location' =>
## 'location_1' [8]
## Parsed with column specification:
## cols(
##   .default = col_character(),
##   fatalities = col_double(),
##   injured = col_double(),
##   total_victims = col_double(),
##   age_of_shooter = col_double(),
##   latitude = col_double(),
##   longitude = col_double(),
##   year = col_double()
## )
## See spec(...) for full column specifications.

Create a map of the shootings

shootings %>% 
  leaflet() %>% 
  addTiles() %>% 
  setView(lat = 39.04, lng = -100, zoom = 4) %>% 
  addMarkers(lat = 39.04, lng = -100) %>% 
 addCircleMarkers(radius = ~log(total_victims))
## Assuming "longitude" and "latitude" are longitude and latitude, respectively

Display median number of total_victims and median number of fatalaties

shootings %>% 
  summarize(median_total_victims = median(total_victims))
shootings %>% 
  summarize(median_fatalaties = median(fatalities))

Create a histogram of the number of shootings per year

shootings %>%
  plot_ly(x = ~year) %>% 
  add_histogram(nbinsx = 40)

Create a heatmap showing the age of the shooter versus their gender. It it interesting to note that there are cases of both male and female shooters working together. According to this heatmap, shooters are predominantly going to be men in their mid to late 20s.

shootings %>% 
  mutate(gender = fct_collapse(gender,
                                Male = c("M", "Male"),
                                Female = c("F", "Female"))) %>% 
  plot_ly(x = ~age_of_shooter, y = ~gender) %>% 
  add_histogram2dcontour()  

Creae a scatterplot depciting the number of people injured by the number of fatalaties in US shootings.

shootings %>% 
  plot_ly(x = ~injured, 
          y = ~fatalities) %>% 
  add_markers()

Conduct a Regression Analysis

num_per_year <- shootings %>% 
  filter(fatalities > 3) %>% 
  count(year) %>% 
  filter(year < 2019)
num_per_year

Calcuate the number of shootings per year.

num_per_year <- shootings %>%                                       # start with the shootings data
  group_by(year) %>%                                                   # we're going to count by year
  filter(year < 2019) %>% 
  summarize(count = n(), num_per_year = sum(year)) %>%             # get the total number of fatalities per year
  mutate(num_per_year = num_per_year/year)                   # divide by the number of shooting incidents

num_per_year

Create a regression model

num_per_year_model <- lm(num_per_year ~ year, data = num_per_year)

Display the regression model within the scatterplot

num_per_year %>% 
  plot_ly(x = ~year, 
          y = ~num_per_year,
          hoverinfo = text,
          text = ~paste("Shootings per year: ", num_per_year, "<br>", "Year: ", year)) %>% 
  add_markers() %>% 
  add_lines(y = ~fitted(num_per_year_model)) %>% 
  layout(title = "Number of Shootings Per Year",
         xaxis = list(title = "Year"),
         yaxis = list(title = "Number of Shootings"))

According to this regression model, the number of shootings has increased since the early 1980s and may continue to rise over the next few years.