Here I’m looking at the crime statistics gathered by the South Australian Police (SAPOL) which has been made available here https://data.sa.gov.au/data/dataset/crime-statistics
The data have been downloaded and merged together into a single data set, with a small amount of data clean up and conditioning. These have been saved into a RDS file.
# load the data from the rds file
crime_stats_tbl <- read_rds("crime_stats.rds")
# review the data
crime_stats_tbl %>% glimpse()
## Rows: 986,612
## Columns: 7
## $ reported_date <date> 2010-07-01, 2010-07-01, 2010-07-01, 2010-…
## $ suburb_incident <chr> "ADELAIDE", "ADELAIDE", "ADELAIDE", "ADELA…
## $ postcode_incident <chr> "5000", "5000", "5000", "5000", "5000", "5…
## $ offence_level_1_description <chr> "OFFENCES AGAINST PROPERTY", "OFFENCES AGA…
## $ offence_level_2_description <chr> "FRAUD DECEPTION AND RELATED OFFENCES", "P…
## $ offence_level_3_description <chr> "Obtain benefit by deception", "Other prop…
## $ offence_count <dbl> 2, 2, 1, 6, 3, 2, 1, 1, 1, 1, 1, 1, 1, 1, …
So there are offence counts per date and different levels of offences, and the data is arranged by suburb and postcode. It looks like offences are broadly categorised into being against property or against people, then it breaks down from there.
Let’s have a look at the start and end dates of the data and the overall number
crime_stats_tbl %>%
summarise(
total_records = n(),
start_date = min(reported_date, na.rm = TRUE),
end_date = max(reported_date, na.rm = TRUE)
)
## # A tibble: 1 x 3
## total_records start_date end_date
## <int> <date> <date>
## 1 986612 2010-07-01 2020-12-31
We can look overall at the counts of crimes
crime_stats_tbl %>%
group_by(offence_level_1_description, offence_level_2_description, offence_level_3_description) %>%
summarise(
total_offences = sum(offence_count)
) %>%
arrange(desc(total_offences)) %>%
head(5)
## `summarise()` has grouped output by 'offence_level_1_description', 'offence_level_2_description'. You can override using the `.groups` argument.
## # A tibble: 5 x 4
## # Groups: offence_level_1_description, offence_level_2_description [3]
## offence_level_1_de… offence_level_2_desc… offence_level_3_desc… total_offences
## <chr> <chr> <chr> <dbl>
## 1 OFFENCES AGAINST P… THEFT AND RELATED OF… Other theft 224421
## 2 OFFENCES AGAINST P… PROPERTY DAMAGE AND … Other property damag… 211247
## 3 OFFENCES AGAINST P… THEFT AND RELATED OF… Theft from motor veh… 104488
## 4 OFFENCES AGAINST P… SERIOUS CRIMINAL TRE… SCT - Residence 87047
## 5 OFFENCES AGAINST P… THEFT AND RELATED OF… Theft from shop 83920
What are the top ten offences?
crime_stats_tbl %>%
group_by(offence_level_3_description) %>%
summarise(
total_offences = sum(offence_count)
) %>%
ungroup() %>%
arrange(desc(total_offences)) %>%
head(10) %>%
ggplot(aes(fct_reorder(offence_level_3_description, total_offences), total_offences)) +
geom_col() +
coord_flip() +
labs(
title = "Top 10 offences by count 2010 to 2020 for South Australia, SAPOL data",
x = "Offence",
y = "Total Offence Count"
)
# get the top 10 offences
top_10_offences <- crime_stats_tbl %>%
group_by(offence_level_3_description) %>%
summarise(
total_offences = sum(offence_count)
) %>%
ungroup() %>%
arrange(desc(total_offences)) %>%
head(10) %>%
pull(offence_level_3_description)
crime_stats_tbl %>%
filter(offence_level_3_description %in% top_10_offences) %>%
mutate(
month_date = ceiling_date(reported_date, "month")
) %>%
group_by(month_date, offence_level_3_description) %>%
summarise(
monthly_offence_count = sum(offence_count)
) %>%
ungroup() %>%
ggplot(aes(month_date, monthly_offence_count)) +
geom_line(aes(colour=offence_level_3_description)) +
labs(
title="Monthly counts of top 10 offences over time for South Australian 2010-2020",
subtitle = "Note: Y axis varies",
x = "Date",
y = "Monthly Offence Count",
colour= "Offence"
)
## `summarise()` has grouped output by 'month_date'. You can override using the `.groups` argument.
See how theft and releated offences behaves
crime_stats_tbl %>%
filter(offence_level_2_description == "THEFT AND RELATED OFFENCES") %>%
distinct(offence_level_3_description)
## # A tibble: 5 x 1
## offence_level_3_description
## <chr>
## 1 Other theft
## 2 Theft from shop
## 3 Theft from motor vehicle
## 4 Theft/Illegal Use of MV
## 5 Receive or handle proceeds of crime
Look at theft offences over time
library(lubridate)
crime_stats_tbl %>%
filter(offence_level_2_description == "THEFT AND RELATED OFFENCES") %>%
mutate(
month_date = ceiling_date(reported_date, "month")
) %>%
group_by(offence_level_3_description, month_date) %>%
summarise(total_offence_count = sum(offence_count)) %>%
ungroup() %>%
ggplot(aes(month_date, total_offence_count)) +
geom_line(aes(colour=offence_level_3_description)) +
theme(
legend.position = "bottom"
) +
labs(
title = "Total Monthly Counts of Theft and Related Offences for South Australia",
x = "Date",
y = "Total Offences per Month",
colour = "Offence Description"
)
## `summarise()` has grouped output by 'offence_level_3_description'. You can override using the `.groups` argument.
Now for serious criminal trespass
crime_stats_tbl %>%
filter(offence_level_2_description == "SERIOUS CRIMINAL TRESPASS") %>%
distinct(offence_level_3_description)
## # A tibble: 3 x 1
## offence_level_3_description
## <chr>
## 1 SCT - Non Residence
## 2 SCT - Residence
## 3 Other unlawful entry with intent
Look at SCT over time
crime_stats_tbl %>%
filter(offence_level_2_description == "SERIOUS CRIMINAL TRESPASS") %>%
mutate(
month_date = ceiling_date(reported_date, "month")
) %>%
group_by(offence_level_3_description, month_date) %>%
summarise(total_offence_count = sum(offence_count)) %>%
ungroup() %>%
ggplot(aes(month_date, total_offence_count)) +
geom_line(aes(colour=offence_level_3_description)) +
theme(
legend.position = "bottom"
) +
labs(
title = "Total Monthly Counts of Serious Criminal Tresspass Offences for South Australia",
x = "Date",
y = "Total Offences per Month",
colour = "Offence Description"
)
## `summarise()` has grouped output by 'offence_level_3_description'. You can override using the `.groups` argument.
Check out that drop off due to COVID!