x <- read_csv("https://raw.githubusercontent.com/Financial-Times/police-misconduct-complaints-analysis/main/output/philly_clean.csv") %>%
select(officer_id, po_race, po_sex)
##
## -- Column specification --------------------------------------------------------
## cols(
## complaint_id = col_character(),
## date_received = col_date(format = ""),
## district_occurrence = col_character(),
## general_cap_classification = col_character(),
## summary = col_character(),
## officer_id = col_double(),
## po_race = col_character(),
## po_sex = col_character(),
## po_assigned_unit = col_character(),
## allegations_investigated = col_character(),
## investigative_findings = col_character(),
## disciplinary_findings = col_character()
## )
x %>%
group_by(officer_id) %>%
summarize(total = n()) %>%
arrange(desc(total)) %>%
mutate(officer_id = as.character(officer_id))
## # A tibble: 3,360 x 2
## officer_id total
## <chr> <int>
## 1 29180642 39
## 2 40251428 27
## 3 65283090 25
## 4 47524892 24
## 5 66221391 24
## 6 47882806 23
## 7 91568567 23
## 8 90705110 22
## 9 64290544 21
## 10 23622762 20
## # ... with 3,350 more rows
decile_data <- x %>%
group_by(officer_id) %>%
summarise(total = n()) %>%
mutate(compl_dec = ntile(total, 10)) %>%
group_by(compl_dec) %>%
summarize(compl_total = sum(total)) %>%
mutate(compl_percentage = compl_total/sum(compl_total))
ggplot(data = decile_data,
mapping = aes(x= compl_dec,
y= compl_percentage))+
geom_col()+
scale_y_continuous(labels=scales::percent_format())+
theme_bw()+
labs(title= "Percentage of Complaints Recieved by Phildephia Police, sorted by percentile", subtitle = "The upper percentile contains a large portion of total complaints", x="Percentile", y= "Complaint Percentage")
