First, import the libraries necessary for the project. Then import the dataset on police response to resistance in Dallas in 2019.
library(tidyverse)
## ── Attaching packages ─────────────────────────────────────────────────────────────────────────────────────────────── tidyverse 1.3.0 ──
## ✓ ggplot2 3.3.2 ✓ purrr 0.3.4
## ✓ tibble 3.0.3 ✓ dplyr 1.0.2
## ✓ tidyr 1.1.2 ✓ stringr 1.4.0
## ✓ readr 1.3.1 ✓ forcats 0.5.0
## ── Conflicts ────────────────────────────────────────────────────────────────────────────────────────────────── tidyverse_conflicts() ──
## x dplyr::filter() masks stats::filter()
## x dplyr::lag() masks stats::lag()
library(readr)
library(dplyr)
Police_Response_to_Resistance2019Dallas <- read_csv("Police_Response_to_Resistance2019Dallas.csv")
## Parsed with column specification:
## cols(
## .default = col_character(),
## OBJECTID = col_double(),
## ZIP = col_double(),
## CURRENT_BA = col_double(),
## OFF_INJURE = col_logical(),
## OFF_HOSPIT = col_logical(),
## STREET_N = col_double(),
## CitNum = col_double(),
## CIT_INJURE = col_logical(),
## CIT_ARREST = col_logical(),
## RA = col_double(),
## BEAT = col_double(),
## SECTOR = col_double(),
## X = col_double(),
## Y = col_double(),
## `Council Districts--Test` = col_double(),
## `Dallas City Limits GIS Layer` = col_double()
## )
## See spec(...) for full column specifications.
Examine the structure and first few entries of the data.
str(Police_Response_to_Resistance2019Dallas)
## tibble [2,944 × 41] (S3: spec_tbl_df/tbl_df/tbl/data.frame)
## $ OBJECTID : num [1:2944] 2817 2234 2755 2110 1663 ...
## $ ZIP : num [1:2944] 75253 75208 75231 75228 75051 ...
## $ FILENUM : chr [1:2944] "UF2019-1702" "UF2019-1344" "UF2019-1665" "UF2019-1314" ...
## $ UOFNum : chr [1:2944] "62295, 63542" "61093" "62820" "60990" ...
## $ OCCURRED_D : chr [1:2944] "12/1/2019" "10/6/2019" "12/31/2019" "9/30/2019" ...
## $ OCCURRED_T : chr [1:2944] "10:34 PM" "12:50 AM" "11:37 PM" "6:20 PM" ...
## $ CURRENT_BA : num [1:2944] 11285 11208 9415 9884 10480 ...
## $ OffSex : chr [1:2944] "Male" "Male" "Male" "Male" ...
## $ OffRace : chr [1:2944] "White" "White" "White" "Hispanic" ...
## $ HIRE_DT : chr [1:2944] "3/8/2017" "8/24/2016" "4/2/2008" "6/10/2009" ...
## $ OFF_INJURE : logi [1:2944] FALSE TRUE FALSE FALSE TRUE FALSE ...
## $ OffCondTyp : chr [1:2944] "No injuries noted or visible" "No injuries noted or visible" "No injuries noted or visible" "No injuries noted or visible" ...
## $ OFF_HOSPIT : logi [1:2944] FALSE FALSE FALSE FALSE FALSE FALSE ...
## $ SERVICE_TY : chr [1:2944] "Service Call" "Arrest" "Arrest" "Call for Cover" ...
## $ ForceType : chr [1:2944] "BD - Tripped, BD - Grabbed" "Held Suspect Down" "K-9 Deployment" "Joint Locks" ...
## $ UOF_REASON : chr [1:2944] "Detention/Frisk" "Arrest" "Arrest" "Arrest" ...
## $ Cycles_Num : chr [1:2944] "NULL" "NULL" "NULL" "NULL" ...
## $ ForceEffec : chr [1:2944] "Yes, Yes" "Yes" "Yes" "Yes" ...
## $ STREET_N : num [1:2944] 102 1500 6904 11760 1350 ...
## $ STREET : chr [1:2944] "Beltline" "Oak Cliff" "Walling" "Ferguson" ...
## $ street_g : chr [1:2944] "S" "S" "NULL" "NULL" ...
## $ street_t : chr [1:2944] "Rd." "Blvd." "Ln." "Rd." ...
## $ Address : chr [1:2944] "102 S Beltline Rd." "1500 S Oak Cliff Blvd." "6904 Walling Ln." "11760 Ferguson Rd." ...
## $ CitNum : num [1:2944] 6.08e+04 6.02e+09 6.11e+04 2.66e+04 5.95e+04 ...
## $ CitRace : chr [1:2944] "White" "Hispanic" "Black" "White" ...
## $ CitSex : chr [1:2944] "Male" "Female" "Male" "Female" ...
## $ CIT_INJURE : logi [1:2944] FALSE TRUE TRUE FALSE FALSE TRUE ...
## $ CitCondTyp : chr [1:2944] "No injuries noted or visible" "Injured prior to contact" "Bite" "No injuries noted or visible" ...
## $ CIT_ARREST : logi [1:2944] FALSE TRUE TRUE TRUE TRUE TRUE ...
## $ CIT_INFL_A : chr [1:2944] "Agitated" "Agitated" "Poor hygiene" "Unknown Drugs" ...
## $ CitChargeT : chr [1:2944] "No Arrest" "APOWW" "Burglary/Habitation, Warrant/Hold" "Assault/FV, Resisting Arrest, Warrant/Hold" ...
## $ Council District : chr [1:2944] "D8" "D1" "D9" "D9" ...
## $ RA : num [1:2944] 6062 4160 6034 1132 NA ...
## $ BEAT : num [1:2944] 357 444 247 228 NA 331 111 155 134 514 ...
## $ SECTOR : num [1:2944] 350 440 240 220 NA 330 110 150 130 510 ...
## $ DIVISION : chr [1:2944] "SOUTHEAST" "SOUTHWEST" "NORTHEAST" "NORTHEAST" ...
## $ X : num [1:2944] 2557123 2474937 2508349 2536678 2433286 ...
## $ Y : num [1:2944] 6944231 6952151 7001784 6999039 6953646 ...
## $ GeoLocation : chr [1:2944] "POINT (-96.586265 32.702825)" "POINT (-96.853036 32.729136)" "POINT (-96.741661 32.863941)" "POINT (-96.649175 32.855492)" ...
## $ Council Districts--Test : num [1:2944] 8 1 13 13 NA 5 12 12 12 2 ...
## $ Dallas City Limits GIS Layer: num [1:2944] 3 3 3 3 NA 3 3 3 3 3 ...
## - attr(*, "spec")=
## .. cols(
## .. OBJECTID = col_double(),
## .. ZIP = col_double(),
## .. FILENUM = col_character(),
## .. UOFNum = col_character(),
## .. OCCURRED_D = col_character(),
## .. OCCURRED_T = col_character(),
## .. CURRENT_BA = col_double(),
## .. OffSex = col_character(),
## .. OffRace = col_character(),
## .. HIRE_DT = col_character(),
## .. OFF_INJURE = col_logical(),
## .. OffCondTyp = col_character(),
## .. OFF_HOSPIT = col_logical(),
## .. SERVICE_TY = col_character(),
## .. ForceType = col_character(),
## .. UOF_REASON = col_character(),
## .. Cycles_Num = col_character(),
## .. ForceEffec = col_character(),
## .. STREET_N = col_double(),
## .. STREET = col_character(),
## .. street_g = col_character(),
## .. street_t = col_character(),
## .. Address = col_character(),
## .. CitNum = col_double(),
## .. CitRace = col_character(),
## .. CitSex = col_character(),
## .. CIT_INJURE = col_logical(),
## .. CitCondTyp = col_character(),
## .. CIT_ARREST = col_logical(),
## .. CIT_INFL_A = col_character(),
## .. CitChargeT = col_character(),
## .. `Council District` = col_character(),
## .. RA = col_double(),
## .. BEAT = col_double(),
## .. SECTOR = col_double(),
## .. DIVISION = col_character(),
## .. X = col_double(),
## .. Y = col_double(),
## .. GeoLocation = col_character(),
## .. `Council Districts--Test` = col_double(),
## .. `Dallas City Limits GIS Layer` = col_double()
## .. )
head(Police_Response_to_Resistance2019Dallas)
## # A tibble: 6 x 41
## OBJECTID ZIP FILENUM UOFNum OCCURRED_D OCCURRED_T CURRENT_BA OffSex OffRace
## <dbl> <dbl> <chr> <chr> <chr> <chr> <dbl> <chr> <chr>
## 1 2817 75253 UF2019… 62295… 12/1/2019 10:34 PM 11285 Male White
## 2 2234 75208 UF2019… 61093 10/6/2019 12:50 AM 11208 Male White
## 3 2755 75231 UF2019… 62820 12/31/2019 11:37 PM 9415 Male White
## 4 2110 75228 UF2019… 60990 9/30/2019 6:20 PM 9884 Male Hispan…
## 5 1663 75051 UF2019… 59592… 8/4/2019 12:10 AM 10480 Male Hispan…
## 6 2538 75217 UF2019… 62255 11/20/2019 12:30 AM 9697 Male White
## # … with 32 more variables: HIRE_DT <chr>, OFF_INJURE <lgl>, OffCondTyp <chr>,
## # OFF_HOSPIT <lgl>, SERVICE_TY <chr>, ForceType <chr>, UOF_REASON <chr>,
## # Cycles_Num <chr>, ForceEffec <chr>, STREET_N <dbl>, STREET <chr>,
## # street_g <chr>, street_t <chr>, Address <chr>, CitNum <dbl>, CitRace <chr>,
## # CitSex <chr>, CIT_INJURE <lgl>, CitCondTyp <chr>, CIT_ARREST <lgl>,
## # CIT_INFL_A <chr>, CitChargeT <chr>, `Council District` <chr>, RA <dbl>,
## # BEAT <dbl>, SECTOR <dbl>, DIVISION <chr>, X <dbl>, Y <dbl>,
## # GeoLocation <chr>, `Council Districts--Test` <dbl>, `Dallas City Limits GIS
## # Layer` <dbl>
This dataset is really rich, with 41 variables to explore. Due to time limitations, I will just focus on 8 variables, and will include an additional one (“citizen condition type”) for clarification as needed. I will use the “select” command to choose those variables.
narrower_police_response <- Police_Response_to_Resistance2019Dallas %>% select(OffSex, OffRace, OFF_INJURE, ForceType, CitRace, CitSex, CIT_INJURE, CIT_ARREST, CitCondTyp)
head(narrower_police_response)
## # A tibble: 6 x 9
## OffSex OffRace OFF_INJURE ForceType CitRace CitSex CIT_INJURE CIT_ARREST
## <chr> <chr> <lgl> <chr> <chr> <chr> <lgl> <lgl>
## 1 Male White FALSE BD - Tri… White Male FALSE FALSE
## 2 Male White TRUE Held Sus… Hispan… Female TRUE TRUE
## 3 Male White FALSE K-9 Depl… Black Male TRUE TRUE
## 4 Male Hispan… FALSE Joint Lo… White Female FALSE TRUE
## 5 Male Hispan… TRUE Joint Lo… Black Male FALSE TRUE
## 6 Male White FALSE Joint Lo… Hispan… Male TRUE TRUE
## # … with 1 more variable: CitCondTyp <chr>
I will now check for NA’s that might cause strange results if unaddressed.
arrange(narrower_police_response, desc(is.na(narrower_police_response)))
## # A tibble: 2,944 x 9
## OffSex OffRace OFF_INJURE ForceType CitRace CitSex CIT_INJURE CIT_ARREST
## <chr> <chr> <lgl> <chr> <chr> <chr> <lgl> <lgl>
## 1 Male White FALSE Weapon d… NULL <NA> FALSE FALSE
## 2 Male Hispan… FALSE Foot Pur… NULL <NA> FALSE FALSE
## 3 Male White FALSE Weapon d… NULL <NA> FALSE FALSE
## 4 Male White FALSE Foot Pur… NULL <NA> FALSE FALSE
## 5 Male White FALSE Weapon d… NULL <NA> FALSE FALSE
## 6 Male White FALSE BD - Tri… White Male FALSE FALSE
## 7 Male White TRUE Held Sus… Hispan… Female TRUE TRUE
## 8 Male White FALSE K-9 Depl… Black Male TRUE TRUE
## 9 Male Hispan… FALSE Joint Lo… White Female FALSE TRUE
## 10 Male Hispan… TRUE Joint Lo… Black Male FALSE TRUE
## # … with 2,934 more rows, and 1 more variable: CitCondTyp <chr>
There are several unknown, missing, or null cases. I want to look at those, and remove them if possible. I will do that by filtering.
narrower_police_response %>% filter(CitSex == "Unknown" | CitRace == "NULL" | CitSex == "NULL" | CitRace == "Unknown" | CitSex == "NA" | CitRace == "NA")
## # A tibble: 43 x 9
## OffSex OffRace OFF_INJURE ForceType CitRace CitSex CIT_INJURE CIT_ARREST
## <chr> <chr> <lgl> <chr> <chr> <chr> <lgl> <lgl>
## 1 Male Black TRUE Taser Di… NULL Male FALSE TRUE
## 2 Male Hispan… FALSE BD - Pus… NULL Female FALSE TRUE
## 3 Male White FALSE Weapon d… NULL Male FALSE TRUE
## 4 Male White FALSE Take Dow… NULL Male FALSE TRUE
## 5 Male White FALSE Weapon d… NULL NULL FALSE TRUE
## 6 Male White FALSE Take Dow… NULL Female FALSE TRUE
## 7 Male White TRUE Verbal C… NULL Male TRUE TRUE
## 8 Male White FALSE Weapon d… NULL Male FALSE TRUE
## 9 Male Hispan… FALSE Foot Pur… NULL Male FALSE TRUE
## 10 Female White TRUE Feet/Leg… NULL Male FALSE TRUE
## # … with 33 more rows, and 1 more variable: CitCondTyp <chr>
There are forty three individuals of unknown sex and/or race. I am going to remove them by subsetting the data because 43 should not make a significant difference in the visualizations, but removing them will make the plots simpler and easier to read.
clean_police_data <- narrower_police_response %>% subset(CitSex != "Unknown" & CitRace != "NULL" & CitSex != "NULL" & CitRace != "Unknown" & CitSex != "NA" & CitRace != "NA")
First, I want to get a quick look at what the sex of the police officers using force is and what sex the citizens against whom force is used are. I can do that most easily with simple bar graphs.
police_sex <- ggplot(clean_police_data, aes(OffSex)) +
geom_bar(color = "blue", fill = "blue") +
labs(x = "Sex of the Police Officer", y = "Number of Police Officers", title = "Sex of Officers Who Used Force", subtitle = "Dallas, 2019")
police_sex
citizen_sex <- ggplot(clean_police_data, aes(CitSex)) +
geom_bar(color = "purple", fill = "purple") +
labs(x = "Sex of the Citizen", y = "Number of Citizens", title = "Sex of Citizens Against Whom Police Used Force", subtitle = "Dallas, 2019")
citizen_sex
I can do the same thing with bar graphs to look at the race of the police officer and the citizen.
police_race <- ggplot(clean_police_data, aes(OffRace)) +
geom_bar(aes(fill = OffRace))+
labs(x = "Officer Race", y = "Number of Officers", title = "Race of Police Who Used Force", subtitle = "Dallas, 2019") +
guides(fill=guide_legend(title="Race"))
police_race
citizen_race <- ggplot(clean_police_data, aes(CitRace)) +
geom_bar(aes(fill = CitRace))+
labs(x = "Citizen Race", y = "Number of Citizens", title = "Race of Citizens Against Whom Police Used Force", subtitle = "Dallas, 2019") +
guides(fill=guide_legend(title="Race"))
citizen_race
The speaker at the Town Hall, who noted that as a black woman she had never had any negative interactions with police until she had sons, prompted me to want to see the relationship between race and sex and police use of force. This section of code creates a bar plot that uses sex as the fill, so we can see race and sex in one plot.
ggplot(clean_police_data, aes(CitRace)) +
geom_bar(aes(fill=(CitSex))) +
labs(x = "Citizen Race", y = "Number of Citizens", title = "Race and Sex of Citizens Against Whom Police Used Force", subtitle = "Dallas, 2019") +
guides(fill=guide_legend(title="Sex"))
It would also be interesting to see the race of the officers and the citizens in these use of force interactions. This is possible by using the same type of box plot, but filling with officer race as opposed to sex.
ggplot(clean_police_data, aes(CitRace)) +
geom_bar(aes(fill=(OffRace))) +
labs(x = "Citizen Race", y = "Number of Uses of Force", title = "Race of Citizens and Officers when Police Used Force", subtitle = "Dallas, 2019") +
guides(fill=guide_legend(title="Officer Race"))
To see how often citizens were injured, we can build another bar plot using the “CIT_INJURE” variable, and fill with the race variable to see who is more likely to be hurt.
Police_Response_to_Resistance2019Dallas %>% ggplot(aes(CIT_INJURE)) +
geom_bar(aes(fill=CitRace)) +
labs(x = "Was the Citizen Injured?", y = "Count of Incidents", title = "Citizens Injured or Not by Race", subtitle = "Dallas, 2019") +
guides(fill=guide_legend(title="Citizen Race"))
If the citizen was injured, what type of injury did he or she have? Filtering the data to include only those injured reveals 143 different injuries. I viewed the data, notice that the majority of injuries were abrasions (and that many abrasions were listed as different things so occured only 1 or 2 times per entry). I subsetted the top fifteen injuries, including gunshots, and plotted them.
injured_citizens <- Police_Response_to_Resistance2019Dallas %>% filter(CIT_INJURE == "TRUE")
injured_citizens %>% count(CitCondTyp)
## # A tibble: 143 x 2
## CitCondTyp n
## <chr> <int>
## 1 Abrasion/Scrape 222
## 2 Abrasion/Scrape, Bite 1
## 3 Abrasion/Scrape, Bloody Nose, Laceration/Cut 1
## 4 Abrasion/Scrape, Bloody Nose, Laceration/Cut, Taser Burn Marks, Treate… 1
## 5 Abrasion/Scrape, Bloody Nose, Redness/Swelling, Elevated Heart Rate 1
## 6 Abrasion/Scrape, Bruise 1
## 7 Abrasion/Scrape, Elevated Heart Rate 1
## 8 Abrasion/Scrape, Hospital Drug Evaluation 1
## 9 Abrasion/Scrape, Hospital Drug Evaluation, Puncture 1
## 10 Abrasion/Scrape, Hospitalized 2
## # … with 133 more rows
#view(injured_citizens %>% count(CitCondTyp))
most_common_injuries <- injured_citizens %>% subset(CitCondTyp == "Abrasion/Scrape" | CitCondTyp == " Laceration/Cut" | CitCondTyp == "Puncture" | CitCondTyp == "Taser Burn Marks" | CitCondTyp == "Non-Visible Injury/Pain" | CitCondTyp == " No injuries noted or visible" |CitCondTyp == "Bruise" | CitCondTyp == "Redness/Swelling" | CitCondTyp == "Abrasion/Scrape, Non-Visible Injury/Pain" | CitCondTyp == "Injured prior to contact" | CitCondTyp == "Taser Burn Marks, Treated by DFD" | CitCondTyp == "Elevated Heart Rate, Abrasion/Scrape" | CitCondTyp == "Abrasion/Scrape, Laceration/Cut" | CitCondTyp == "Bite" | CitCondTyp == "Gunshot" )
most_common_injuries %>% ggplot(aes(CitCondTyp)) +
geom_bar(aes(fill="red"), show.legend = FALSE) +
labs(x = "Citizen Condition", y = "Count of Incidents", title = "Most Common Injuries Citizens Sustained", subtitle = "Dallas, 2019")+
theme(axis.text.x = element_text( color="black", size=10, angle=90))
By adding a fill, I can see if those injured were actually arrested.
injured_citizens <- Police_Response_to_Resistance2019Dallas %>% filter(CIT_INJURE == "TRUE")
injured_citizens %>% count(CitCondTyp)
## # A tibble: 143 x 2
## CitCondTyp n
## <chr> <int>
## 1 Abrasion/Scrape 222
## 2 Abrasion/Scrape, Bite 1
## 3 Abrasion/Scrape, Bloody Nose, Laceration/Cut 1
## 4 Abrasion/Scrape, Bloody Nose, Laceration/Cut, Taser Burn Marks, Treate… 1
## 5 Abrasion/Scrape, Bloody Nose, Redness/Swelling, Elevated Heart Rate 1
## 6 Abrasion/Scrape, Bruise 1
## 7 Abrasion/Scrape, Elevated Heart Rate 1
## 8 Abrasion/Scrape, Hospital Drug Evaluation 1
## 9 Abrasion/Scrape, Hospital Drug Evaluation, Puncture 1
## 10 Abrasion/Scrape, Hospitalized 2
## # … with 133 more rows
#view(injured_citizens %>% count(CitCondTyp))
most_common_injuries <- injured_citizens %>% subset(CitCondTyp == "Abrasion/Scrape" | CitCondTyp == " Laceration/Cut" | CitCondTyp == "Puncture" | CitCondTyp == "Taser Burn Marks" | CitCondTyp == "Non-Visible Injury/Pain" | CitCondTyp == " No injuries noted or visible" |CitCondTyp == "Bruise" | CitCondTyp == "Redness/Swelling" | CitCondTyp == "Abrasion/Scrape, Non-Visible Injury/Pain" | CitCondTyp == "Injured prior to contact" | CitCondTyp == "Taser Burn Marks, Treated by DFD" | CitCondTyp == "Elevated Heart Rate, Abrasion/Scrape" | CitCondTyp == "Abrasion/Scrape, Laceration/Cut" | CitCondTyp == "Bite" | CitCondTyp == "Gunshot" )
most_common_injuries %>% ggplot(aes(CitCondTyp)) +
geom_bar(aes(fill=CIT_ARREST), show.legend = TRUE) +
labs(x = "Citizen Condition", y = "Count of Incidents", title = "Most Common Injuries Citizens Sustained", subtitle = "Dallas, 2019")+
theme(axis.text.x = element_text( color="black", size=10, angle=90))+
guides(fill=guide_legend(title="Citizen Arrested?"))
We can also filter to determine the race and sex of the gunshot citizens. First I will filter the data down to just those whose condition was gunshot, and then I will plot by race and sex.
gunshots <- most_common_injuries %>% subset(CitCondTyp == "Gunshot")
ggplot(gunshots, aes(CitRace)) +
geom_bar(aes(fill=(CitSex))) +
labs(x = "Citizen Race", y = "Number of Incidents", title = "Race and Sex of Citizens with Gunshots", subtitle = "Dallas, 2019") +
guides(fill=guide_legend(title="Sex"))
With the same small dataset (of gunshot citizens), I will plot by officer race and sex.
ggplot(gunshots, aes(OffRace)) +
geom_bar(aes(fill=(OffSex))) +
labs(x = "Officer Race", y = "Number of Incidents", title = "Race and Sex of Officers who Shot Citizens", subtitle = "Dallas, 2019") +
guides(fill=guide_legend(title="Sex"))
By subsetting for taser injuries, we can do the same thing for Taser Burns.
tased <- most_common_injuries %>% subset(CitCondTyp == "Taser Burn Marks" | CitCondTyp == "Taser Burn Marks, Treated by DFD")
ggplot(tased, aes(CitRace)) +
geom_bar(aes(fill=(CitSex))) +
labs(x = "Citizen Race", y = "Number of Incidents", title = "Race and Sex of Citizens with Taser Burns", subtitle = "Dallas, 2019") +
guides(fill=guide_legend(title="Sex"))
ggplot(tased, aes(OffRace)) +
geom_bar(aes(fill=(OffSex))) +
labs(x = "Officer Race", y = "Number of Incidents", title = "Race and Sex of Officers who Tased Citizens", subtitle = "Dallas, 2019") +theme(axis.text.x = element_text( color="black", size=10, angle=45))+
guides(fill=guide_legend(title="Sex"))
We could also use a quick bar plot just to see if more or less experienced officers are more involved in use of force incidents (by plotting date of hire against the count of incidents). This is such a crowded plot, that the dates are obscured. The individual dates are not as important, however, as the overall pattern.
Police_Response_to_Resistance2019Dallas %>% ggplot(aes(HIRE_DT)) +
geom_bar()+
labs(x = "Officer Hire Date", y = "Number of Uses of Force", title = "Entry on Duty Date for Officers Involved in Use of Force", subtitle = "Dallas, 2019")
By adding a fill value, it is also possible to visualize the racial hiring patterns (at least among those officers who used force in 2019) over time as well.
Police_Response_to_Resistance2019Dallas %>% ggplot(aes(HIRE_DT)) +
geom_bar(aes(fill=(OffRace)))+
labs(x = "Officer Hire Date", y = "Number of Uses of Force", title = "Entry on Duty Date for Officers Involved in Use of Force", subtitle = "Dallas, 2019") +
guides(fill=guide_legend(title="Officer Race"))
The “Police Response to Resistance - 2019” dataset is a set of 2944 rows and 41 columns available on the Dallas OpenData webpage (https://www.dallasopendata.com/Public-Safety/Police-Response-to-Resistance-2019/46zb-7qgj). The data was provided by the Dallas Police Department, and the dataset was last updated on June 16, 2020. The data focuses on police use of force in response to resistance. It notes what type of force was used, whether it resulted in injury, and whether it was effective.
The data include an ID, a ZIP, a filenumber, the use of force number, the date and time of the occurrence, the current BA, the officer’s sex, race, and hire date, whether or not the officer was injured, the officer’s condition and whether or not the officer was hospitalized, the service type, the force type, use of force reason, the number of cycles, whether or not the use of force was effective, several variables related to address, the citizen’s number, race, sex, whether or not the citizen was injured, the citizen’s condition, whether or not the citizen was arrested, the CIT_INFL_A, which appears to be the citizen’s demeanor, what the citizen was charged with, the council district, RA, beat, sector, division, and location data.
I focused on the racial and sexual demographics of the officers and citizens, and cleaned the data relating to those categories. I first filtered the data to determine how many values were missing or unknown. Once I determined there were very few relative to the size of the dataset, I removed those using base-R’s subset function.
As I explored the data, I discovered more questions that the data raised. I filtered the data to focus in on questions of interest to me. First, I selected the variables I thought would be of most interest, and extracted those columns from the data into a new dataset. I then looked at the rows with NAs, nulls, or unknowns; counted them; and determined that there were few enough that I could remove them without damaging the dataset. I subsetted the data again, removing those 43 cases, and creating a new, “cleaner” dataset.
As I constructed visualizations, I developed a few more questions, which led me to clean the data some more. In particular, I needed to clean the data to work with the injured-citizen data. The CIT_INJURE column was a boolean, which enabled me to easily filter out the injured citizens into a new dataset by simply selecting the cases where the column was “TRUE.” Determining what type of injuries were most common required counting the incidents in the CitCondTyp column (in the new dataset of injured citizens), and then plotting the most common injuries.
The visualizations revealed that in 2019 use of force incidents, the majority of the police involved were white and male. The majority of citizens against whom force was used were black and male. Police used force more often against black females than against white females, but far more often against black males than black females. That plot also indicated that police used force against white males more often than against black females. After a speaker at the Town Hall event discussed how she had not had unpleasant interactions with police until she had sons, I was interested in seeing the relationship between race and sex. This data does show that black men are more likely to be involved in use of force incidents. Not surprisingly, given that more white officers used force in general, the race of the officers who most often used force against black citizens was white. Black officers also used force more often against black citizens than against white citizens.
Most uses of force did not result in injury. Blacks were most common among those injured. Blacks were most common among those not injured as well. This is a function of most uses of force being against black people in 2019. Most injuries appeared to be relatively minor (abrasions, bruises), though there was no variable indicating whether or not the citizen was hospitalized. (There was a variable indicating whether or not the officer was hospitalized.) Six citizens were shot in a use of force incident in 2019 in Dallas. The bar plot of those injuries reveals they were all black males. Another bar plot showed that all of the officers involved were male; three were white, one black, one asian, and one hispanic. Black men were also more likely to suffer from taser burns, and white officers were most likely to do the tasing.
The barplot of hire dates of the officers involved in uses of force does not reveal anything particularly interesting. I hoped it would show something about the experience level of the officers, but they seem fairly distributed between recent and earlier hires. Adding the fill of race, however, does raise more questions, especially about the earlier hires who were predominantly white.
While this data provided a great deal of interesting information, it left a large number of questions unanswered. What is the demographic breakdown of the Dallas police department in general, and of the Dallas population in general? For example, are the officers involved in use of force incidents proportional to the police department population, or are white male police officers more likely to be involved in use of force incidents. Similarly, how does the population of citizens against whom force is used compare with the population in general? If we knew that information, we would have a better idea if black men are disproportionately faced with police force.