library(tidyverse)
## -- Attaching packages -------------------------------------------------------------------------------- tidyverse 1.3.0 --
## v ggplot2 3.2.1 v purrr 0.3.3
## v tibble 2.1.3 v dplyr 0.8.4
## v tidyr 1.0.2 v stringr 1.4.0
## v readr 1.3.1 v forcats 0.4.0
## -- Conflicts ----------------------------------------------------------------------------------- tidyverse_conflicts() --
## x dplyr::filter() masks stats::filter()
## x dplyr::lag() masks stats::lag()
library(ggplot2)
library(dplyr)
getwd()
## [1] "C:/Users/Maggie/Desktop"
setwd("C:/Users/Maggie/Documents")
the_counted <- read_csv("~/the-counted-2016 (1).csv")
## Parsed with column specification:
## cols(
## uid = col_double(),
## name = col_character(),
## age = col_character(),
## gender = col_character(),
## raceethnicity = col_character(),
## month = col_character(),
## day = col_double(),
## year = col_double(),
## streetaddress = col_character(),
## city = col_character(),
## state = col_character(),
## classification = col_character(),
## lawenforcementagency = col_character(),
## armed = col_character()
## )
view(the_counted)
str(the_counted)
## Classes 'spec_tbl_df', 'tbl_df', 'tbl' and 'data.frame': 1093 obs. of 14 variables:
## $ uid : num 2016406 201681 20161 20163 201653 ...
## $ name : chr "Terry Frost" "Jeffry Graves" "Joshua Sisson" "Sean O'Brien" ...
## $ age : chr "20" "62" "30" "37" ...
## $ gender : chr "Male" "Male" "Male" "Male" ...
## $ raceethnicity : chr "Black" "White" "White" "White" ...
## $ month : chr "May" "January" "January" "January" ...
## $ day : num 25 31 1 2 2 2 3 4 4 5 ...
## $ year : num 2016 2016 2016 2016 2016 ...
## $ streetaddress : chr "2692 Madison Rd" "36500 Bridgepointe Dr" "4200 6th Ave" "100 Washington St" ...
## $ city : chr "Cincinnati" "Newark" "San Diego" "Livingston" ...
## $ state : chr "OH" "CA" "CA" "MT" ...
## $ classification : chr "Gunshot" "Gunshot" "Gunshot" "Gunshot" ...
## $ lawenforcementagency: chr "Cincinnati Police Department" "Newark Police Department" "San Diego Police Department" "Livingston Police Department" ...
## $ armed : chr "Firearm" "Firearm" "Knife" "Knife" ...
## - attr(*, "spec")=
## .. cols(
## .. uid = col_double(),
## .. name = col_character(),
## .. age = col_character(),
## .. gender = col_character(),
## .. raceethnicity = col_character(),
## .. month = col_character(),
## .. day = col_double(),
## .. year = col_double(),
## .. streetaddress = col_character(),
## .. city = col_character(),
## .. state = col_character(),
## .. classification = col_character(),
## .. lawenforcementagency = col_character(),
## .. armed = col_character()
## .. )
colSums(is.na(the_counted))
## uid name age
## 0 0 0
## gender raceethnicity month
## 0 0 0
## day year streetaddress
## 0 0 26
## city state classification
## 0 0 0
## lawenforcementagency armed
## 0 0
sum(is.na(the_counted))
## [1] 26
the_counted2 <- droplevels(na.omit(the_counted))
sum(is.na(the_counted2))
## [1] 0
view(the_counted2)
which(is.na(the_counted2$streetaddress))
## integer(0)
colSums(is.na(the_counted2))
## uid name age
## 0 0 0
## gender raceethnicity month
## 0 0 0
## day year streetaddress
## 0 0 0
## city state classification
## 0 0 0
## lawenforcementagency armed
## 0 0
nrow(the_counted2)
## [1] 1067
unique(the_counted$raceethnicity)
## [1] "Black" "White" "Hispanic/Latino"
## [4] "Native American" "Asian/Pacific Islander" "Arab-American"
## [7] "Unknown"
length(unique(the_counted$raceethnicity))
## [1] 7
table(the_counted2$gender)
##
## Female Male
## 61 1006
gender_table <- table(the_counted2$gender)
barplot(gender_table)
`
table(the_counted2$armed)
##
## Disputed Firearm Knife No
## 11 494 159 168
## Non-lethal firearm Other Unknown Vehicle
## 45 80 77 33
armed_table <- table(the_counted2$armed)
barplot(armed_table, main = "Barplot of Types of Armed",
xlab = "Types of Armed", ylab = "Count", cex.names = 0.5, col = c("blue", "red", "yellow", "orange","green", "pink","purple"))
Unarmed <- the_counted2 %>%
filter(armed == "No") %>%
select(gender, raceethnicity)
table(Unarmed$gender)
##
## Female Male
## 23 145
ggplot(data = Unarmed,
mapping = aes(x = raceethnicity, fill = gender)) +
ggtitle("Stacked Barplot Comparing Unarmed By Race & Ethnicity and Gender") +
geom_bar(col = "black")
Firearmed <- the_counted2 %>%
filter(armed == "Firearm") %>%
select(gender, raceethnicity)
view(Firearmed)
table(Firearmed$gender)
##
## Female Male
## 17 477
table(Firearmed$raceethnicity)
##
## Arab-American Asian/Pacific Islander Black
## 1 5 142
## Hispanic/Latino Native American Unknown
## 78 10 9
## White
## 249
ggplot(data = Firearmed,
mapping = aes(x = raceethnicity, fill = gender)) +
ggtitle("Stacked Barplot Comparing Firearmed By Race & Ethnicity and Gender") +
geom_bar(col = "yellow")
Armed_FW <- ggplot(data = the_counted2, aes(x = raceethnicity, fill = gender )) +
geom_bar( position = "stack") +
facet_wrap(~armed) +
labs( x = NULL, y = NULL, title = "Armed by Gender and Race & Ethnicity") +
theme(strip.text = element_text(size = 12, color = "black",hjust = .05))
Armed_FW
The-Counted-2016 is a dataset of people killed by police officers in the United States in 2016. The variables that I was very interested in exploring was gender, race & ethnicity, and armed. They are all character variables. Since the dataset was not too messy, I did not have to do too much cleaning on this dataset. First, I used the function colSums(is.na) to check if there were any missing values. Then I used the droplevels(na.omit) to remove all the NAs. I had also used the filter and select function to filter out a specific category I wanted to explore in a column and select function to select columns that I was interested in working with. For this dataset, I first looked at the counts for each category of armed. I saw that people who were unarmed were the second highest from all the other categories, firearmed being the highest. Then I wanted to compare the gender and the ethnicity of those who were unarmed and killed. Next, I also wanted to explore the firearmed category since it’s the highest compared to all the other category of armed. To do so, I filtered out firearmed, and again, selected the gender and ethnicity to compare between gender and ethnicity. Lastly, out of curiosity, I created a facet wrap to look at every category of armed compared between gender and ethnicity. According to all the visualizations, most of the people killed by police officers in 2016 were males (1006). Compared to the number of males killed, females seem much lower, 61 females. There was no surprise that people who were armed with firearmed made up most of the death compared to other types of armed. I was surprised that unarmed was the second highest. The visualizations also revealed that more unarmed females were killed compared to armed with firearm. In contrast, more armed with firearm males were killed compared to unarmed. Also shown that the most killed race & ethnicity across each of the eight types of armed in 2016 were Black, Hispanic/Latino, and White for males and a mixed for females. One thing I wish I could’ve included was the variable age. The reason why it was not included was because there was a couple of rows with “Unknown”. I used the filter function to filter all the dataset that does not equal “Unknown”. Then, I tried to change the age variable from character to numeric, but it would not work. There was a warning message, “NAs introduced by coercion,” and I was not sure of how to fix it.