library(tidyverse)
## ── Attaching packages ───────────────────────────────────────────────────────────────────────────────────────── tidyverse 1.3.0 ──
## ✓ ggplot2 3.3.2 ✓ purrr 0.3.4
## ✓ tibble 3.0.3 ✓ dplyr 1.0.1
## ✓ tidyr 1.1.1 ✓ stringr 1.4.0
## ✓ readr 1.3.1 ✓ forcats 0.5.0
## ── Conflicts ──────────────────────────────────────────────────────────────────────────────────────────── tidyverse_conflicts() ──
## x dplyr::filter() masks stats::filter()
## x dplyr::lag() masks stats::lag()
library(psych)
##
## Attaching package: 'psych'
## The following objects are masked from 'package:ggplot2':
##
## %+%, alpha
library(readr)
library(dplyr)
library(lubridate)
##
## Attaching package: 'lubridate'
## The following objects are masked from 'package:base':
##
## date, intersect, setdiff, union
setwd("/Users/tiffanyking/Desktop/Data 110")
fatalshootings <- read_csv("fatal_police_shootings_clean.csv")
## Parsed with column specification:
## cols(
## id = col_double(),
## name = col_character(),
## date = col_character(),
## manner_of_death = col_character(),
## armed = col_character(),
## age = col_double(),
## gender = col_character(),
## race = col_character(),
## city = col_character(),
## state = col_character(),
## signs_of_mental_illness = col_logical(),
## threat_level = col_character(),
## flee = col_character(),
## body_camera = col_logical(),
## weapon = col_character()
## )
str(fatalshootings)
## tibble [5,148 × 15] (S3: spec_tbl_df/tbl_df/tbl/data.frame)
## $ id : num [1:5148] 3 4 5 8 9 11 13 15 16 17 ...
## $ name : chr [1:5148] "Tim Elliot" "Lewis Lee Lembke" "John Paul Quintero" "Matthew Hoffman" ...
## $ date : chr [1:5148] "1/2/2015" "1/2/2015" "1/3/2015" "1/4/2015" ...
## $ manner_of_death : chr [1:5148] "shot" "shot" "shot and Tasered" "shot" ...
## $ armed : chr [1:5148] "gun" "gun" "unarmed" "toy weapon" ...
## $ age : num [1:5148] 53 47 23 32 39 18 22 35 34 47 ...
## $ gender : chr [1:5148] "M" "M" "M" "M" ...
## $ race : chr [1:5148] "Asian" "White non-hispanic" "Hispanic" "White non-hispanic" ...
## $ city : chr [1:5148] "Shelton" "Aloha" "Wichita" "San Francisco" ...
## $ state : chr [1:5148] "WA" "OR" "KS" "CA" ...
## $ signs_of_mental_illness: logi [1:5148] TRUE FALSE FALSE TRUE FALSE FALSE ...
## $ threat_level : chr [1:5148] "attack" "attack" "other" "attack" ...
## $ flee : chr [1:5148] "Not fleeing" "Not fleeing" "Not fleeing" "Not fleeing" ...
## $ body_camera : logi [1:5148] FALSE FALSE FALSE FALSE FALSE FALSE ...
## $ weapon : chr [1:5148] "armed" "armed" "unarmed" "armed" ...
## - attr(*, "spec")=
## .. cols(
## .. id = col_double(),
## .. name = col_character(),
## .. date = col_character(),
## .. manner_of_death = col_character(),
## .. armed = col_character(),
## .. age = col_double(),
## .. gender = col_character(),
## .. race = col_character(),
## .. city = col_character(),
## .. state = col_character(),
## .. signs_of_mental_illness = col_logical(),
## .. threat_level = col_character(),
## .. flee = col_character(),
## .. body_camera = col_logical(),
## .. weapon = col_character()
## .. )
summary(fatalshootings)
## id name date manner_of_death
## Min. : 3 Length:5148 Length:5148 Length:5148
## 1st Qu.:1565 Class :character Class :character Class :character
## Median :3072 Mode :character Mode :character Mode :character
## Mean :3039
## 3rd Qu.:4534
## Max. :5895
##
## armed age gender race
## Length:5148 Min. : 6.00 Length:5148 Length:5148
## Class :character 1st Qu.:27.00 Class :character Class :character
## Mode :character Median :35.00 Mode :character Mode :character
## Mean :37.31
## 3rd Qu.:46.00
## Max. :91.00
## NA's :233
## city state signs_of_mental_illness
## Length:5148 Length:5148 Mode :logical
## Class :character Class :character FALSE:3957
## Mode :character Mode :character TRUE :1191
##
##
##
##
## threat_level flee body_camera weapon
## Length:5148 Length:5148 Mode :logical Length:5148
## Class :character Class :character FALSE:4558 Class :character
## Mode :character Mode :character TRUE :590 Mode :character
##
##
##
##
Fatal <- na.omit(fatalshootings)
select(Fatal, race, age) %>%
group_by(race) %>%
summarize(total = mean(age)) %>%
arrange(desc(total))
## `summarise()` ungrouping output (override with `.groups` argument)
## # A tibble: 6 x 2
## race total
## <chr> <dbl>
## 1 White non-hispanic 40.0
## 2 Asian 36.7
## 3 Hispanic 33.6
## 4 Other 33.2
## 5 Black non-hispanic 32.5
## 6 Native American 32.1
my_df <- data.frame ("Color" = c("White non-hispanic","Black non-hispanic","Asian","Native American","Hispanic", "Other" ) , "Ave."=c(40.04986,32.45303,36.65854,32.08571,33.59846,33.16279 ))
library(RColorBrewer)
Plot1 <- my_df %>%
ggplot(aes(x=Color, y=Ave. ,fill=Color)) +
geom_bar(stat="identity")+
coord_flip() +
theme_minimal() +
ggtitle("Average Age M/F will be fatally shot by police through 2015-20", ) +
theme (plot.title = element_text(hjust = .01, size=15)) +
theme(legend.justification = -20,
legend.position="bottom",
legend.text = element_text(size=6) ,
) +
xlab("Race") +
ylab ("Average Age") +
scale_color_brewer()
Plot1
## Another plot
library(reshape2)
##
## Attaching package: 'reshape2'
## The following object is masked from 'package:tidyr':
##
## smiths
Fatal_shot <- Fatal %>%
mutate (date = as.Date(date,format= "%m/%d/%Y"))
Fatal_shot
## # A tibble: 4,317 x 15
## id name date manner_of_death armed age gender race city state
## <dbl> <chr> <date> <chr> <chr> <dbl> <chr> <chr> <chr> <chr>
## 1 3 Tim … 2015-01-02 shot gun 53 M Asian Shel… WA
## 2 4 Lewi… 2015-01-02 shot gun 47 M Whit… Aloha OR
## 3 5 John… 2015-01-03 shot and Taser… unar… 23 M Hisp… Wich… KS
## 4 8 Matt… 2015-01-04 shot toy … 32 M Whit… San … CA
## 5 9 Mich… 2015-01-04 shot nail… 39 M Hisp… Evans CO
## 6 11 Kenn… 2015-01-04 shot gun 18 M Whit… Guth… OK
## 7 13 Kenn… 2015-01-05 shot gun 22 M Hisp… Chan… AZ
## 8 15 Broc… 2015-01-06 shot gun 35 M Whit… Assa… KS
## 9 16 Autu… 2015-01-06 shot unar… 34 F Whit… Burl… IA
## 10 17 Lesl… 2015-01-06 shot toy … 47 M Blac… Knox… PA
## # … with 4,307 more rows, and 5 more variables: signs_of_mental_illness <lgl>,
## # threat_level <chr>, flee <chr>, body_camera <lgl>, weapon <chr>
##Plot 2
plot2 <- Fatal_shot %>%
ggplot(aes(date, age, shape = factor(race))) +
geom_point(aes(colour = factor(race)), size=1) +
ggtitle("Ages of Fatal Shooting by Police through 2015-20")
plot2
##Plot 4
Plot4 <- Fatal_shot %>%
tidyr::gather("race", "age", 3:10)
## Warning: attributes are not identical across measure variables;
## they will be dropped
Fatalcrimesplot <-Fatal_shot %>%
ggplot(., aes(date, age))+
geom_point()+
aes(color = race)+
facet_wrap(~race)
Fatalcrimesplot
# Certain races had more data than others and it seems alittle messy.
Plot3 <- Fatal_shot %>%
select(race, date, age) %>% #alluvial needs category, time-variable, value
filter(race == c("White non-hispanic","Black non-hispanic", "Asian", "Native American","Hispanic","Other")) %>%
group_by(date, race) %>%
summarize(Average_age = mean(age)) %>%
select(race,date, Average_age)
## Warning in race == c("White non-hispanic", "Black non-hispanic", "Asian", :
## longer object length is not a multiple of shorter object length
## `summarise()` regrouping output by 'date' (override with `.groups` argument)
library(alluvial)
alluvial_ts(Plot3, wave = .3, ygap = 5, grid = TRUE, xlab = "Year", ylab = "Average Age", border = NA, axis.cex = .8, leg.mode = F, leg.max = 250000, leg.y = .96, leg.cex = .7, title = "Average Age M/F will be fatally shot by police through 2015-20")
## [1] "Error: time variable must be numeric, factor, or ordered"
The source and topic of the data, any variables included, what kind of variables they are, how you cleaned the dataset up (be detailed and specific, using proper terminology where appropriate).
What the visualization represents, any interesting patterns or surprises that arise within the visualization, and anything that you might have shown that you could not get to work or that you wished you could have included. Incorporate anything discussed during the town hall event.
For this project, I chose the fatal shooting dataset, and I found this dataset on blackboard. I remember this dataset because we used it in tableau and plotted the data on a map. After reviewing the Fatal Shootings Dataset, I thought it was a good idea to explore this dataset. The data was pretty clean from the beginning; however, it had a few NA’s. So, I use (na.omit) to remove any rows with NA/ missing data. The variables I chose to focus on were a race, age, and year/date. I wanted to see the whats the average age people have been shot by police throughout the years. I created various plots ranging from bar plots and scatter plots—the reason I chose bar plots because it’s best to show categorical datasets. The numeric column was the age. I had difficulty plotting specific years and focusing on different variables such as gender/mental illness signs. Throughout this process, I would like to plot alluvial and other line plots; however, I had trouble manipulating the date/year, and I kept running into errors. After the results, I would follow up to see if there is an incline or decline in a police shooting during certain months/time of the year (i.e., are there more deaths in the summer versus the winter?) I also wished that explored the number of people who have a sign of mental illness by race. Mental illness is a severe issue, and I think it would be interesting to explore.