Load libraries

library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr     1.1.2     ✔ readr     2.1.4
## ✔ forcats   1.0.0     ✔ stringr   1.5.0
## ✔ ggplot2   3.4.2     ✔ tibble    3.2.1
## ✔ lubridate 1.9.2     ✔ tidyr     1.3.0
## ✔ purrr     1.0.1     
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(sf)
## Linking to GEOS 3.10.2, GDAL 3.4.1, PROJ 8.2.1; sf_use_s2() is TRUE
library(tmap)
## The legacy packages maptools, rgdal, and rgeos, underpinning the sp package,
## which was just loaded, will retire in October 2023.
## Please refer to R-spatial evolution reports for details, especially
## https://r-spatial.org/r/2023/05/15/evolution4.html.
## It may be desirable to make the sf package available;
## package maintainers should consider adding sf to Suggests:.
## The sp package is now running under evolution status 2
##      (status 2 uses the sf package in place of rgdal)
library(leaflet)
library(here)
## here() starts at /home/rstudio/Mod 2/Major 1
library(tidycensus)
library(sf)
library(dplyr)
library(lubridate)
library(skimr)
library(maps)
## 
## Attaching package: 'maps'
## 
## The following object is masked from 'package:purrr':
## 
##     map
library(ggplot2)

read in fatal encounters file

fe <- read.csv("~/Mod 2/Major 1/FATAL ENCOUNTERS DOT ORG SPREADSHEET.csv")

skim

skim(fe)
## Warning: There was 1 warning in `dplyr::summarize()`.
## ℹ In argument: `dplyr::across(tidyselect::any_of(variable_names),
##   mangled_skimmers$funs)`.
## ℹ In group 0: .
## Caused by warning:
## ! There were 141 warnings in `dplyr::summarize()`.
## The first warning was:
## ℹ In argument: `dplyr::across(tidyselect::any_of(variable_names),
##   mangled_skimmers$funs)`.
## Caused by warning in `grepl()`:
## ! unable to translate 'Juan de Le<97>n Guti<8e>rrez' to a wide string
## ℹ Run `dplyr::last_dplyr_warnings()` to see the 140 remaining warnings.
Data summary
Name fe
Number of rows 31498
Number of columns 36
_______________________
Column type frequency:
character 28
logical 1
numeric 7
________________________
Group variables None

Variable type: character

skim_variable n_missing complete_rate min max empty n_unique whitespace
Name 0 1.00 4 82 0 29859 0
Age 0 1.00 0 5 1221 160 0
Gender 0 1.00 0 11 144 4 0
Race 0 1.00 0 57 1 12 0
Race.with.imputations 862 0.97 0 23 6 10 0
Imputation.probability 881 0.97 0 19 3 6614 0
URL.of.image..PLS.NO.HOTLINKS. 0 1.00 0 373 16773 14668 0
Date.of.injury.resulting.in.death..month.day.year. 0 1.00 10 10 0 7736 0
Location.of.injury..address. 0 1.00 0 74 556 28893 0
Location.of.death..city. 0 1.00 0 30 36 6340 0
State 0 1.00 0 2 1 52 0
Location.of.death..county. 0 1.00 0 33 15 1536 0
Full.Address 0 1.00 0 103 1 29709 0
Latitude 0 1.00 0 17 1 29515 0
Agency.or.agencies.involved 0 1.00 0 266 78 6829 0
Highest.level.of.force 0 1.00 0 33 4 19 0
Name.Temporary 0 1.00 0 58 25969 5284 0
Armed.Unarmed 0 1.00 0 19 14419 10 0
Alleged.weapon 0 1.00 0 35 14421 269 0
Aggressive.physical.movement 0 1.00 0 42 14418 32 0
Fleeing.Not.fleeing 0 1.00 0 42 14419 26 0
Description.Temp 0 1.00 0 2239 27431 3870 0
URL.Temp 0 1.00 0 723 28281 3066 0
Brief.description 0 1.00 0 2239 2 29883 0
Dispositions.Exclusions.INTERNAL.USE..NOT.FOR.ANALYSIS 0 1.00 0 89 3 156 0
Intended.use.of.force..Developing. 0 1.00 0 22 3 9 0
Supporting.document.link 0 1.00 0 438 2 29269 0
Foreknowledge.of.mental.illness..INTERNAL.USE..NOT.FOR.ANALYSIS 0 1.00 0 19 62 5 0

Variable type: logical

skim_variable n_missing complete_rate mean count
X 31498 0 NaN :

Variable type: numeric

skim_variable n_missing complete_rate mean sd p0 p25 p50 p75 p100 hist
Unique.ID 1 1.00 15749.00 9092.55 1.00 7875 15749.00 23623.00 31497.00 ▇▇▇▇▇
Location.of.death..zip.code. 182 0.99 58352.53 27966.03 1013.00 33147 60649.00 85033.00 99921.00 ▃▇▃▆▇
Longitude 1 1.00 -95.40 16.30 -165.59 -111 -90.56 -82.57 -67.27 ▁▁▅▇▇
UID.Temporary 25969 0.18 15464.08 6559.72 9759.00 11156 12549.00 19240.00 30340.00 ▇▁▁▁▂
X.1 31497 0.00 10895.00 NA 10895.00 10895 10895.00 10895.00 10895.00 ▁▁▇▁▁
Unique.ID.formula 31496 0.00 29497.00 2828.43 27497.00 28497 29497.00 30497.00 31497.00 ▇▁▁▁▇
Unique.identifier..redundant. 1 1.00 15749.00 9092.55 1.00 7875 15749.00 23623.00 31497.00 ▇▇▇▇▇

Remove na values for lat/long

fe <- fe %>%
  filter(!is.na(Latitude) & !is.na(Longitude)) 

group and count by state

fe_state <- fe %>%
  group_by(State) %>%
  summarize(
    Count = n()
  )

map by state

us_states <- st_read("tl_2012_us_state.shp")
## Reading layer `tl_2012_us_state' from data source 
##   `/home/rstudio/Mod 2/Major 1/tl_2012_us_state.shp' using driver `ESRI Shapefile'
## Simple feature collection with 56 features and 17 fields
## Geometry type: MULTIPOLYGON
## Dimension:     XY
## Bounding box:  xmin: -19951910 ymin: -1643352 xmax: 20021890 ymax: 11554790
## Projected CRS: Popular Visualisation CRS / Mercator
fe_state$STUSPS <- fe_state$State

fe_map <- us_states %>% 
  left_join(fe_state)
## Joining with `by = join_by(STUSPS)`
tmap_mode("view");
## tmap mode set to interactive viewing
tm_shape(fe_map) + 
  tm_polygons(col = "Count", style = "jenks")

Extract years and make new column

fe$Date.of.injury.resulting.in.death..month.day.year. <- tryCatch(
  mdy(fe$Date.of.injury.resulting.in.death..month.day.year.),
  error = function(e) NA
)

fe$year <- year(fe$Date.of.injury.resulting.in.death..month.day.year.)

make case count per year

fe_year <- fe %>%
  group_by(year) %>%
  summarize(
    Count = n()
  )

graph of cases over time

ggplot(data = fe_year) +
  geom_point(mapping = aes(x = year, y = Count))

Make case count by race

fe_race <- fe %>%
  group_by(Race) %>%
  summarize(
    Count = n()
  )
ggplot(data=fe_race, aes(x=Race, y=Count)) +
  geom_bar(stat="identity")