The dataset relates to hate crimes and includes various columns with information incidents, agencies, locations, offenders, victims, and the nature of crimes.
The goal of this project is to make some visualizations that communicate the dynamics of relationships among some of the variables and distribution of some variables.
if(!require(ggplot2))install.packages("tidyverse")
## Loading required package: ggplot2
library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr 1.1.4 ✔ readr 2.1.5
## ✔ forcats 1.0.0 ✔ stringr 1.5.1
## ✔ lubridate 1.9.3 ✔ tibble 3.2.1
## ✔ purrr 1.0.2 ✔ tidyr 1.3.1
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
install.packages("dplyr")
## Warning: package 'dplyr' is in use and will not be installed
library(dplyr)
install.packages("ggplot2")
## Warning: package 'ggplot2' is in use and will not be installed
library(ggplot2)
data <- read.csv("hate_crime.csv")
set.seed(123)
sampled_data <- data %>% sample_n(240000)
sampled_data <- sampled_data %>% mutate(offense_name =
ifelse(offense_name =="Destruction/Damage/Vandalism of Property",
"Damage", offense_name))
sampled_data <- sampled_data %>% mutate(offense_name =
ifelse(offense_name =="Simple Assault",
"Sim.Assault", offense_name))
sampled_data <- sampled_data %>% mutate(offense_name =
ifelse(offense_name =="Aggravated Assault",
"Agg.Assault", offense_name))
sampled_data <- sampled_data %>% mutate(offense_name =
ifelse(offense_name ==
"Destruction/Damage/Vandalism of Property;Intimidation",
"Damage&Int", offense_name))
sampled_data <- sampled_data %>% mutate(offense_name =
ifelse(offense_name ==
"Burglary/Breaking & Entering",
"Burglary", offense_name))
sampled_data <- sampled_data %>% mutate(offense_name =
ifelse(offense_name ==
"All Other Larceny",
"Larceny", offense_name))
sampled_data <- sampled_data %>% mutate(offense_name =
ifelse(offense_name ==
"Drug/Narcotic Violations",
"Drug", offense_name))
length(unique(sampled_data$offense_name))
## [1] 398
offense_frequency <- sampled_data %>%
group_by(offense_name) %>%
summarise(frequency = n()) %>%
arrange(desc(frequency))
view(offense_frequency)
sum((offense_frequency$frequency))
## [1] 240000
incidents_per_year <- sampled_data %>%
group_by(data_year) %>%
summarize(total_incidents = n())
trend1 <- ggplot(incidents_per_year, aes(x=data_year, y = total_incidents)) +
geom_line(color = "black") +
geom_point(color = "blue") +
labs(title = "Total Incidents over Years", x = "Years", y = "Total Incidents") +
theme_minimal()
trend1
victims_per_year <- sampled_data %>%
group_by(data_year) %>%
summarize(total_individual_victims = n())
trend2 <- ggplot(victims_per_year, aes(x=data_year, y = total_individual_victims)) +
geom_line(color = "black") +
geom_point(color = "blue") +
labs(title = "Total Victims over Years", x = "Years", y = "Total Victims") +
theme_minimal()
trend2
var1991 <- sampled_data[sampled_data$data_year == 1991 & sampled_data$offense_name %in%
c("Intimidation", "Robbery", "Agg.Assault", "Sim.Assault",
"Damage", "Arson", "Damage&Int", "Larceny", "Drug", "Burglary"), ]
bar_var1991 <- ggplot(var1991, aes(offense_name)) + geom_bar() +
labs(title = "Distribution of The Top 10 Offences in 1991",
x = "Year: 1991", y = "Frequency") +
theme_minimal()
bar_var1991
var1995 <- sampled_data[sampled_data$data_year == 1995 & sampled_data$offense_name %in%
c("Intimidation", "Robbery", "Agg.Assault", "Sim.Assault",
"Damage", "Arson", "Damage&Int", "Larceny", "Drug", "Burglary"), ]
bar_var1995 <- ggplot(var1995, aes(offense_name)) + geom_bar() +
labs(title = "Distribution of The Top 10 Offences in 1995",
x = "Year:1995", y = "Frequency") +
theme_minimal()
bar_var1995
var2000 <- sampled_data[sampled_data$data_year == 2000 & sampled_data$offense_name %in%
c("Intimidation", "Robbery", "Agg.Assault", "Sim.Assault",
"Damage", "Arson", "Damage&Int", "Larceny", "Drug", "Burglary"), ]
bar_var2000 <- ggplot(var2000, aes(offense_name)) + geom_bar() +
labs(title = "Distribution of The Top 10 Offences in 2000",
x = "Year: 2000", y = "Frequency") +
theme_minimal()
bar_var2000
var2005 <- sampled_data[sampled_data$data_year == 2005 & sampled_data$offense_name %in%
c("Intimidation", "Robbery", "Agg.Assault", "Sim.Assault",
"Damage", "Arson", "Damage&Int", "Larceny", "Drug", "Burglary"), ]
bar_var2005 <- ggplot(var2005, aes(offense_name)) + geom_bar() +
labs(title = "Distribution of The Top Offences in 2005",
x = "Year: 2005", y = "Frequency") +
theme_minimal()
bar_var2005
var2010 <- sampled_data[sampled_data$data_year == 2010 & sampled_data$offense_name %in%
c("Intimidation", "Robbery", "Agg.Assault", "Sim.Assault",
"Damage", "Arson", "Damage&Int", "Larceny", "Drug", "Burglary"), ]
bar_var2010 <- ggplot(var2010, aes(offense_name)) + geom_bar() +
labs(title = "Distribution of The Top 10 Offences in 2010",
x = "Year: 2010", y = "Frequency") +
theme_minimal()
bar_var2010
var2015 <- sampled_data[sampled_data$data_year == 2015 & sampled_data$offense_name %in%
c("Intimidation", "Robbery", "Agg.Assault", "Sim.Assault",
"Damage", "Arson", "Damage&Int", "Larceny", "Drug", "Burglary"), ]
bar_var2015 <- ggplot(var2015, aes(offense_name)) + geom_bar() +
labs(title = "Distribution of The Top 10 Offences in 2015",
x = "Year: 2015", y = "Frequency") +
theme_minimal()
bar_var2015
var2020 <- sampled_data[sampled_data$data_year == 2020 & sampled_data$offense_name %in%
c("Intimidation", "Robbery", "Agg.Assault", "Sim.Assault",
"Damage", "Arson", "Damage&Int", "Larceny", "Drug", "Burglary"), ]
bar_var2020 <- ggplot(var2020, aes(offense_name)) + geom_bar() +
labs(title = "Distribution of The Top Offences in 2020",
x = "Year: 2020", y = "Frequency") +
theme_minimal()
bar_var2020
var2022 <- sampled_data[sampled_data$data_year == 2022 & sampled_data$offense_name %in%
c("Intimidation", "Robbery", "Agg.Assault", "Sim.Assault",
"Damage", "Arson", "Damage&Int", "Larceny", "Drug", "Burglary"), ]
bar_var2022 <- ggplot(var2022, aes(offense_name)) + geom_bar() +
labs(title = "Distribution of The Top 10 Offences in 2022 ",
x = "Year: 2022", y = "Frequency") +
theme_minimal()
bar_var2022
damage <- sampled_data%>%
filter(offense_name == "Damage")
hist_damage <- ggplot(damage, aes(data_year)) + geom_histogram(binwidth = 1, fill = "grey",
color= "black" ) +
labs(title = "Distribution of Damage",x = "Damage",
y = "Frequency") +
theme_minimal()
hist_damage
damage <- sampled_data%>%
filter(offense_name == "Damage")
damage_per_year <- damage %>%
group_by(data_year) %>%
summarize(damage = n())
damage_trend <- ggplot(damage_per_year, aes(x=data_year, y = damage)) +
geom_line(color = "black") +
geom_point(color = "blue") +
labs(title = "Trend for Damage over the Years", x = "Years", y = "Total Damages") +
theme_minimal()
damage_trend
damreg1991 <- sampled_data %>% filter(data_year == "1991" & offense_name == "Damage")
bar_damreg1991 <- ggplot(damreg1991, aes(region_name)) + geom_bar(fill = "grey", color = "black") +
labs(title =
"Distribution of Damage By Region in 1991",
x = "Region", y = "Frequency") + theme_minimal()
bar_damreg1991
damreg2000 <- sampled_data %>% filter(data_year == "2000" & offense_name == "Damage")
bar_damreg2000 <- ggplot(damreg2000, aes(region_name)) + geom_bar(fill = "grey", color = "black") +
labs(title =
"Distribution of Damage By Region in 2000",
x = "Region", y = "Frequency") + theme_minimal()
bar_damreg2000
damreg2010 <- sampled_data %>% filter(data_year == "2010" & offense_name == "Damage")
bar_damreg2010 <- ggplot(damreg2010, aes(region_name)) + geom_bar(fill = "grey", color = "black") +
labs(title =
"Distribution of Damage By Region in 2010",
x = "Region", y = "Frequency") + theme_minimal()
bar_damreg2010
damreg2020 <- sampled_data %>% filter(data_year == "2020" & offense_name == "Damage")
bar_damreg2020 <- ggplot(damreg2020, aes(region_name)) + geom_bar(fill = "grey", color = "black") +
labs(title =
"Distribution of Damage By Region in 2020",
x = "Region", y = "Frequency") + theme_minimal()
bar_damreg2020
damreg2022 <- sampled_data %>% filter(data_year == "2022" & offense_name == "Damage")
bar_damreg2022 <- ggplot(damreg2022, aes(region_name)) + geom_bar(fill = "grey", color = "black") +
labs(title =
"Distribution of Damage By Region in 2022",
x = "Region", y = "Frequency") + theme_minimal()
bar_damreg2022
intimidations <- sampled_data%>%
filter(offense_name == "Intimidation")
hist_intimidations <- ggplot(intimidations, aes(data_year)) + geom_histogram(binwidth = 1, fill = "grey",
color= "black" ) +
labs(title = "Distribution of Intimidation over the Years",x = "Intimidation",
y = "Frequency") +
theme_minimal()
hist_intimidations
intimidate <- sampled_data%>%
filter(offense_name == "Intimidation")
intimidate_per_year <- intimidate %>%
group_by(data_year) %>%
summarize(intimidate = n())
intimidate_trend <- ggplot(intimidate_per_year, aes(x=data_year, y = intimidate)) +
geom_line(color = "black") +
geom_point(color = "blue") +
labs(title = "Trend for Intimidation over the Years", x = "Years", y = "Total Intimidations") +
theme_minimal()
intimidate_trend
reg1991 <- sampled_data %>% filter(data_year == "1991" & offense_name == "Intimidation")
bar_reg1991 <- ggplot(reg1991, aes(region_name)) + geom_bar(fill = "grey", color = "black") +
labs(title =
"Distribution of Intimidation By Region in 1991",
x = "Region", y = "Frequency") + theme_minimal()
bar_reg1991
reg2000 <- sampled_data %>% filter(data_year == "2000" & offense_name == "Intimidation")
bar_reg2000 <- ggplot(reg2000, aes(region_name)) + geom_bar(fill = "grey", color = "black") +
labs(title =
"Distribution of Intimidation By Region in 2000",
x = "Region", y = "Frequency") + theme_minimal()
bar_reg2000
reg2010 <- sampled_data %>% filter(data_year == "2010" & offense_name == "Intimidation")
bar_reg2010 <- ggplot(reg2010, aes(region_name)) + geom_bar(fill = "grey", color = "black") +
labs(title =
"Distribution of Intimidation By Region in 2010",
x = "Region", y = "Frequency") + theme_minimal()
bar_reg2010
reg2020 <- sampled_data %>% filter(data_year == "2020" & offense_name == "Intimidation")
bar_reg2020 <- ggplot(reg2020, aes(region_name)) + geom_bar(fill = "grey", color = "black") +
labs(title =
"Distribution of Intimidation By Region in 2020",
x = "Region", y = "Frequency") + theme_minimal()
bar_reg2020
reg2022 <- sampled_data %>% filter(data_year == "2022" & offense_name == "Intimidation")
bar_reg2022 <- ggplot(reg2022, aes(region_name)) + geom_bar(fill = "grey", color = "black") +
labs(title =
"Distribution of Intimidation By Region in 2022",
x = "Region", y = "Frequency") + theme_minimal()
bar_reg2022
simple_assault <- sampled_data%>%
filter(offense_name == "Sim.Assault")
hist_simple_assault <- ggplot(simple_assault, aes(data_year)) + geom_histogram(binwidth = 1, fill = "grey",
color= "black" ) +
labs(title = "Distribution of Simple Assault",x = "Simple Assault",
y = "Frequency") +
theme_minimal()
hist_simple_assault
simp.assault <- sampled_data%>%
filter(offense_name == "Sim.Assault")
simp.assault_per_year <- simp.assault %>%
group_by(data_year) %>%
summarize(simp.assault = n())
simp.assault_trend <- ggplot(simp.assault_per_year, aes(x=data_year, y = simp.assault)) +
geom_line(color = "black") +
geom_point(color = "blue") +
labs(title = "Trend for Simple Assaults over the Years", x = "Years", y = "Total Simple Assaults") +
theme_minimal()
simp.assault_trend
simreg1991 <- sampled_data %>% filter(data_year == "1991" & offense_name == "Sim.Assault")
bar_simreg1991 <- ggplot(simreg1991, aes(region_name)) + geom_bar(fill = "grey", color = "black") +
labs(title =
"Distribution of Simple Assault By Region in 1991",
x = "Region", y = "Frequency") + theme_minimal()
bar_simreg1991
simreg2000 <- sampled_data %>% filter(data_year == "2000" & offense_name == "Sim.Assault")
bar_simreg2000 <- ggplot(simreg2000, aes(region_name)) + geom_bar(fill = "grey", color = "black") +
labs(title =
"Distribution of Simple Assault By Region in 2000",
x = "Region", y = "Frequency") + theme_minimal()
bar_simreg2000
simreg2010 <- sampled_data %>% filter(data_year == "2010" & offense_name == "Sim.Assault")
bar_simreg2010 <- ggplot(simreg2010, aes(region_name)) + geom_bar(fill = "grey", color = "black") +
labs(title =
"Distribution of Simple Assault By Region in 2010",
x = "Region", y = "Frequency") + theme_minimal()
bar_simreg2010
simreg2020 <- sampled_data %>% filter(data_year == "2020" & offense_name == "Sim.Assault")
bar_simreg2020 <- ggplot(simreg2020, aes(region_name)) + geom_bar(fill = "grey", color = "black") +
labs(title =
"Distribution of Simple Assault By Region in 2020",
x = "Region", y = "Frequency") + theme_minimal()
bar_simreg2020
simreg2022 <- sampled_data %>% filter(data_year == "2022" & offense_name == "Sim.Assault")
bar_simreg2022 <- ggplot(simreg2022, aes(region_name)) + geom_bar(fill = "grey", color = "black") +
labs(title =
"Distribution of Simple Assault By Region in 2022",
x = "Region", y = "Frequency") + theme_minimal()
bar_simreg2022
aggravated_assault <- sampled_data%>%
filter(offense_name == "Agg.Assault")
hist_aggravated_assault <- ggplot(aggravated_assault, aes(data_year)) + geom_histogram(binwidth = 1, fill = "grey",
color= "black" ) +
labs(title = "Distribution of Aggravated Assault",x = "Aggravated Assault",
y = "Frequency") +
theme_minimal()
hist_aggravated_assault
agg.assault <- sampled_data%>%
filter(offense_name == "Agg.Assault")
agg.assault_per_year <- agg.assault %>%
group_by(data_year) %>%
summarize(agg.assault = n())
agg.assault_trend <- ggplot(agg.assault_per_year, aes(x=data_year, y = agg.assault)) +
geom_line(color = "black") +
geom_point(color = "blue") +
labs(title = "Trend for Aggresive Assaults over the Years", x = "Years",
y = "Total Aggressive Assaults") +
theme_minimal()
agg.assault_trend
aggreg1991 <- sampled_data %>% filter(data_year == "1991" & offense_name == "Agg.Assault")
bar_aggreg1991 <- ggplot(aggreg1991, aes(region_name)) + geom_bar(fill = "grey", color = "black") +
labs(title =
"Distribution of Aggresive Assault By Region in 1991",
x = "Region", y = "Frequency") + theme_minimal()
bar_aggreg1991
aggreg2000 <- sampled_data %>% filter(data_year == "2000" & offense_name == "Agg.Assault")
bar_aggreg2000 <- ggplot(aggreg2000, aes(region_name)) + geom_bar(fill = "grey", color = "black") +
labs(title =
"Distribution of Aggresive Assault By Region in 2000",
x = "Region", y = "Frequency") + theme_minimal()
bar_aggreg2000
aggreg2010 <- sampled_data %>% filter(data_year == "2010" & offense_name == "Agg.Assault")
bar_aggreg2010 <- ggplot(aggreg2010, aes(region_name)) + geom_bar(fill = "grey", color = "black") +
labs(title =
"Distribution of Aggresive Assault By Region in 2010",
x = "Region", y = "Frequency") + theme_minimal()
bar_aggreg2010
aggreg2020 <- sampled_data %>% filter(data_year == "2020" & offense_name == "Agg.Assault")
bar_aggreg2020 <- ggplot(aggreg2020, aes(region_name)) + geom_bar(fill = "grey", color = "black") +
labs(title =
"Distribution of Aggresive Assault By Region in 2020",
x = "Region", y = "Frequency") + theme_minimal()
bar_aggreg2020
aggreg2022 <- sampled_data %>% filter(data_year == "2022" & offense_name == "Agg.Assault")
bar_aggreg2022 <- ggplot(aggreg2022, aes(region_name)) + geom_bar(fill = "grey", color = "black") +
labs(title =
"Distribution of Aggresive Assault By Region in 2022",
x = "Region", y = "Frequency") + theme_minimal()
bar_aggreg2022
robb <- sampled_data%>%
filter(offense_name == "Robbery")
hist_robb <- ggplot(robb, aes(data_year)) + geom_histogram(binwidth = 1, fill = "grey",
color= "black" ) +
labs(title = "Distribution of Robbery",x = "Robbery",
y = "Frequency") +
theme_minimal()
hist_robb
robbery <- sampled_data%>%
filter(offense_name == "Robbery")
robbery_per_year <- robbery %>%
group_by(data_year) %>%
summarize(robbery = n())
robbery_trend <- ggplot(robbery_per_year, aes(x=data_year, y = robbery)) +
geom_line(color = "black") +
geom_point(color = "blue") +
labs(title = "Trend for Robbery over the Years", x = "Years",
y = "Total Robbery") +
theme_minimal()
robbery_trend
robreg1991 <- sampled_data %>% filter(data_year == "1991" & offense_name == "Robbery")
bar_robreg1991 <- ggplot(robreg1991, aes(region_name)) + geom_bar(fill = "grey", color = "black") +
labs(title =
"Distribution of Robbery By Region in 1991",
x = "Region", y = "Frequency") + theme_minimal()
bar_robreg1991
robreg2000 <- sampled_data %>% filter(data_year == "2000" & offense_name == "Robbery")
bar_robreg2000 <- ggplot(robreg2000, aes(region_name)) + geom_bar(fill = "grey", color = "black") +
labs(title =
"Distribution of Robbery By Region in 2000",
x = "Region", y = "Frequency") + theme_minimal()
bar_robreg2000
robreg2010 <- sampled_data %>% filter(data_year == "2010" & offense_name == "Robbery")
bar_robreg2010 <- ggplot(robreg2010, aes(region_name)) + geom_bar(fill = "grey", color = "black") +
labs(title =
"Distribution of Robbery By Region in 2010",
x = "Region", y = "Frequency") + theme_minimal()
bar_robreg2010
robreg2020 <- sampled_data %>% filter(data_year == "2020" & offense_name == "Robbery")
bar_robreg2020 <- ggplot(robreg2020, aes(region_name)) + geom_bar(fill = "grey", color = "black") +
labs(title =
"Distribution of Robbery By Region in 2020",
x = "Region", y = "Frequency") + theme_minimal()
bar_robreg2020
robreg2022 <- sampled_data %>% filter(data_year == "2022" & offense_name == "Robbery")
bar_robreg2022 <- ggplot(robreg2022, aes(region_name)) + geom_bar(fill = "grey", color = "black") +
labs(title =
"Distribution of Robbery By Region in 2022",
x = "Region", y = "Frequency") + theme_minimal()
bar_robreg2022
Burglary <- sampled_data%>%
filter(offense_name == "Burglary")
hist_Burglary <- ggplot(Burglary, aes(data_year)) + geom_histogram(binwidth = 1, fill = "grey",
color= "black" ) +
labs(title = "Distribution of Burglary",x = "Burglary",
y = "Frequency") +
theme_minimal()
hist_Burglary
burg <- sampled_data%>%
filter(offense_name == "Burglary")
burg_per_year <- burg %>%
group_by(data_year) %>%
summarize(burg = n())
burg_trend <- ggplot(burg_per_year, aes(x=data_year, y = burg)) +
geom_line(color = "black") +
geom_point(color = "blue") +
labs(title = "Trend for Burglary over the Years", x = "Years",
y = "Total Burglary") +
theme_minimal()
burg_trend
burreg1991 <- sampled_data %>% filter(data_year == "1991" & offense_name == "Burglary")
bar_robreg1991 <- ggplot(burreg1991, aes(region_name)) + geom_bar(fill = "grey", color = "black") +
labs(title =
"Distribution of Burglary By Region in 1991",
x = "Region", y = "Frequency") + theme_minimal()
bar_robreg1991
burreg2000 <- sampled_data %>% filter(data_year == "2000" & offense_name == "Burglary")
bar_robreg2000 <- ggplot(burreg2000, aes(region_name)) + geom_bar(fill = "grey", color = "black") +
labs(title =
"Distribution of Burglary By Region in 2000",
x = "Region", y = "Frequency") + theme_minimal()
bar_robreg2000
burreg2010 <- sampled_data %>% filter(data_year == "2010" & offense_name == "Burglary")
bar_robreg2000 <- ggplot(burreg2010, aes(region_name)) + geom_bar(fill = "grey", color = "black") +
labs(title =
"Distribution of Burglary By Region in 2010",
x = "Region", y = "Frequency") + theme_minimal()
bar_robreg2010
burreg2020 <- sampled_data %>% filter(data_year == "2020" & offense_name == "Burglary")
bar_robreg2020 <- ggplot(burreg2020, aes(region_name)) + geom_bar(fill = "grey", color = "black") +
labs(title =
"Distribution of Burglary By Region in 2020",
x = "Region", y = "Frequency") + theme_minimal()
bar_robreg2020
burreg2022 <- sampled_data %>% filter(data_year == "2022" & offense_name == "Burglary")
bar_burreg2022 <- ggplot(burreg2022, aes(region_name)) + geom_bar(fill = "grey", color = "black") +
labs(title =
"Distribution of Burglary By Region in 2022",
x = "Region", y = "Frequency") + theme_minimal()
bar_burreg2022
Larceny <- sampled_data%>%
filter(offense_name == "Larceny")
hist_Larceny <- ggplot(Larceny, aes(data_year)) + geom_histogram(binwidth = 1, fill = "grey",
color= "black" ) +
labs(title = "Distribution of Larceny",x = "Larceny",
y = "Frequency") +
theme_minimal()
hist_Larceny
larc <- sampled_data%>%
filter(offense_name == "Larceny")
larc_per_year <- larc %>%
group_by(data_year) %>%
summarize(larc = n())
larc_trend <- ggplot(larc_per_year, aes(x=data_year, y = larc)) +
geom_line(color = "black") +
geom_point(color = "blue") +
labs(title = "Trend for Larceny over the Years", x = "Years",
y = "Total Larceny") +
theme_minimal()
larc_trend
larreg2000 <- sampled_data %>% filter(data_year == "2000" & offense_name == "Larceny")
bar_larreg2000 <- ggplot(larreg2000, aes(region_name)) + geom_bar(fill = "grey", color = "black") +
labs(title =
"Distribution of Larceny By Region in 2000",
x = "Region", y = "Frequency") + theme_minimal()
bar_larreg2000
larreg2010 <- sampled_data %>% filter(data_year == "2010" & offense_name == "Larceny")
bar_larreg2010 <- ggplot(larreg2010, aes(region_name)) + geom_bar(fill = "grey", color = "black") +
labs(title =
"Distribution of Larceny By Region in 2010",
x = "Region", y = "Frequency") + theme_minimal()
bar_larreg2010
larreg2020 <- sampled_data %>% filter(data_year == "2020" & offense_name == "Larceny")
bar_larreg2020 <- ggplot(larreg2020, aes(region_name)) + geom_bar(fill = "grey", color = "black") +
labs(title =
"Distribution of Larceny By Region in 2020",
x = "Region", y = "Frequency") + theme_minimal()
bar_larreg2020
larreg2022 <- sampled_data %>% filter(data_year == "2022" & offense_name == "Larceny")
bar_larreg2022 <- ggplot(larreg2022, aes(region_name)) + geom_bar(fill = "grey", color = "black") +
labs(title =
"Distribution of Larceny By Region in 2022",
x = "Region", y = "Frequency") + theme_minimal()
bar_larreg2022
DI <- sampled_data%>%
filter(offense_name == "Damage&Int")
hist_DI <- ggplot(DI, aes(data_year)) + geom_histogram(binwidth = 1, fill = "grey",
color= "black" ) +
labs(title = "Distribution of Damage&Intimidation",x = "Damage&Intimidation",
y = "Frequency") +
theme_minimal()
hist_DI
DamInt <- sampled_data%>%
filter(offense_name == "Damage&Int")
DamInt_per_year <- DamInt %>%
group_by(data_year) %>%
summarize(DamInt = n())
DamInt_trend <- ggplot(DamInt_per_year, aes(x=data_year, y = DamInt)) +
geom_line(color = "black") +
geom_point(color = "blue") +
labs(title = "Trend for Damage&Intimidation over the Years", x = "Years",
y = "Total Damage&Intimidation") +
theme_minimal()
DamInt_trend
direg1991 <- sampled_data %>% filter(data_year == "1991" & offense_name == "Damage&Int")
bar_direg1991 <- ggplot(direg1991, aes(region_name)) + geom_bar(fill = "grey", color = "black") +
labs(title =
"Distribution of Damage & Intimidation By Region in 1991",
x = "Region", y = "Frequency") + theme_minimal()
bar_direg1991
direg2000 <- sampled_data %>% filter(data_year == "2000" & offense_name == "Damage&Int")
bar_direg2000 <- ggplot(direg2000, aes(region_name)) + geom_bar(fill = "grey", color = "black") +
labs(title =
"Distribution of Damage & Intimidation By Region in 2000",
x = "Region", y = "Frequency") + theme_minimal()
bar_direg2000
direg2010 <- sampled_data %>% filter(data_year == "2010" & offense_name == "Damage&Int")
bar_direg2010 <- ggplot(direg2010, aes(region_name)) + geom_bar(fill = "grey", color = "black") +
labs(title =
"Distribution of Damage & Intimidation By Region in 2010",
x = "Region", y = "Frequency") + theme_minimal()
bar_direg2010
direg2020 <- sampled_data %>% filter(data_year == "2020" & offense_name == "Damage&Int")
bar_direg2020 <- ggplot(direg2020, aes(region_name)) + geom_bar(fill = "grey", color = "black") +
labs(title =
"Distribution of Damage & Intimidation By Region in 2020",
x = "Region", y = "Frequency") + theme_minimal()
bar_direg2020
direg2022 <- sampled_data %>% filter(data_year == "2022" & offense_name == "Damage&Int")
bar_direg2022 <- ggplot(direg2022, aes(region_name)) + geom_bar(fill = "grey", color = "black") +
labs(title =
"Distribution of Damage & Intimidation By Region in 2022",
x = "Region", y = "Frequency") + theme_minimal()
bar_direg2022
Arson <- sampled_data%>%
filter(offense_name == "Arson")
Arson <- ggplot(Arson, aes(data_year)) + geom_histogram(binwidth = 1, fill = "grey",
color= "black" ) +
labs(title = "Distribution of Arson",x = "Arson",
y = "Frequency") +
theme_minimal()
Arson
ars <- sampled_data%>%
filter(offense_name == "Arson")
ars_per_year <- ars %>%
group_by(data_year) %>%
summarize(ars = n())
ars_trend <- ggplot(ars_per_year, aes(x=data_year, y = ars)) +
geom_line(color = "black") +
geom_point(color = "blue") +
labs(title = "Trend for Arson over the Years", x = "Years",
y = "Total Arson") +
theme_minimal()
ars_trend
arsreg1991 <- sampled_data %>% filter(data_year == "1991" & offense_name == "Arson")
bar_arsreg1991 <- ggplot(arsreg1991, aes(region_name)) + geom_bar(fill = "grey", color = "black") +
labs(title =
"Distribution of Arson By Region in 1991",
x = "Region", y = "Frequency") + theme_minimal()
bar_arsreg1991
arsreg2000 <- sampled_data %>% filter(data_year == "2000" & offense_name == "Arson")
bar_arsreg2000 <- ggplot(arsreg2000, aes(region_name)) + geom_bar(fill = "grey", color = "black") +
labs(title =
"Distribution of Arson By Region in 2000",
x = "Region", y = "Frequency") + theme_minimal()
bar_arsreg2000
arsreg2010 <- sampled_data %>% filter(data_year == "2010" & offense_name == "Arson")
bar_arsreg2010 <- ggplot(arsreg2010, aes(region_name)) + geom_bar(fill = "grey", color = "black") +
labs(title =
"Distribution of Arson By Region in 2010",
x = "Region", y = "Frequency") + theme_minimal()
bar_arsreg2010
arsreg2020 <- sampled_data %>% filter(data_year == "2020" & offense_name == "Arson")
bar_arsreg2020 <- ggplot(arsreg2020, aes(region_name)) + geom_bar(fill = "grey", color = "black") +
labs(title =
"Distribution of Arson By Region in 2020",
x = "Region", y = "Frequency") + theme_minimal()
bar_arsreg2020
arsreg2022 <- sampled_data %>% filter(data_year == "2022" & offense_name == "Arson")
bar_arsreg2022 <- ggplot(arsreg2022, aes(region_name)) + geom_bar(fill = "grey", color = "black") +
labs(title =
"Distribution of Arson By Region in 2022",
x = "Region", y = "Frequency") + theme_minimal()
bar_arsreg2022
drugs <- sampled_data%>%
filter(offense_name == "Drug")
drugs <- ggplot(drugs, aes(data_year)) + geom_histogram(binwidth = 1, fill = "grey",
color= "black" ) +
labs(title = "Distribution of Drug",x = "Drug",
y = "Frequency") +
theme_minimal()
drugs
dg <- sampled_data%>%
filter(offense_name == "Drug")
dg_per_year <- dg %>%
group_by(data_year) %>%
summarize(dg = n())
dg_trend <- ggplot(dg_per_year, aes(x=data_year, y = dg)) +
geom_line(color = "black") +
geom_point(color = "blue") +
labs(title = "Trend for Drug over the Years", x = "Years",
y = "Total Drugs") +
theme_minimal()
dg_trend
drureg2000 <- sampled_data %>% filter(data_year == "2000" & offense_name == "Drug")
bar_drureg2000 <- ggplot(drureg2000, aes(region_name)) + geom_bar(fill = "grey", color = "black") +
labs(title =
"Distribution of Drugs By Region in 2000",
x = "Region", y = "Frequency") + theme_minimal()
bar_drureg2000
drureg2010 <- sampled_data %>% filter(data_year == "2010" & offense_name == "Drug")
bar_drureg2010 <- ggplot(drureg2010, aes(region_name)) + geom_bar(fill = "grey", color = "black") +
labs(title =
"Distribution of Drugs By Region in 2010",
x = "Region", y = "Frequency") + theme_minimal()
bar_drureg2010
drureg2020 <- sampled_data %>% filter(data_year == "2020" & offense_name == "Drug")
bar_drureg2020 <- ggplot(drureg2020, aes(region_name)) + geom_bar(fill = "grey", color = "black") +
labs(title =
"Distribution of Drugs By Region in 2020",
x = "Region", y = "Frequency") + theme_minimal()
bar_drureg2020
drureg2022 <- sampled_data %>% filter(data_year == "2022" & offense_name == "Drug")
bar_drureg2022 <- ggplot(drureg2022, aes(region_name)) + geom_bar(fill = "grey", color = "black") +
labs(title =
"Distribution of Drugs By Region in 2022",
x = "Region", y = "Frequency") + theme_minimal()
bar_drureg2022
scatter1 <- ggplot(sampled_data, aes(x = total_offender_count, y = victim_count)) + geom_point(fill = "grey",
color= "black" ) +
labs(title = "Relationship between Victims and Offenders",x = "Victim Count",
y = "Offender Count") +
theme_minimal()
scatter1
scatter1 <- ggplot(sampled_data, aes(x = victim_count, y = total_offender_count)) + geom_point(fill = "grey",
color= "black" ) +
geom_smooth(method = "lm", color = "black", se = FALSE) +
labs(title = "Relationship between Offenders and Victims",x = "Victim Count",
y = "Offender Count") +
theme_minimal()
scatter1
## `geom_smooth()` using formula = 'y ~ x'
dam_years <- sampled_data %>% filter(data_year %in% c("1991", "2000", "2010", "2020")
& offense_name == "Damage")
dam_years <- dam_years %>% select(data_year, offense_name)
bar_damyear <- ggplot(dam_years, aes(x = data_year)) + geom_bar() +
labs(title = "Distribution of Damage") + theme_minimal()
bar_damyear
int_years <- sampled_data %>% filter(data_year %in% c("1991", "2000", "2010", "2020")
& offense_name == "Intimidation")
int_years <- int_years %>% select(data_year, offense_name)
bar_intyear <- ggplot(int_years, aes(x = data_year)) + geom_bar() +
labs(title = "Distribution of Intimidation") + theme_minimal()
bar_intyear
sim_years <- sampled_data %>% filter(data_year %in% c("1991", "2000", "2010", "2020")
& offense_name == "Sim.Assault")
sim_years <- sim_years %>% select(data_year, offense_name)
bar_simyear <- ggplot(sim_years, aes(x = data_year)) + geom_bar() +
labs(title = "Distribution of Simple Assault") + theme_minimal()
bar_simyear
agg_years <- sampled_data %>% filter(data_year %in% c("1991", "2000", "2010", "2020")
& offense_name == "Agg.Assault")
agg_years <- agg_years %>% select(data_year, offense_name)
bar_aggyear <- ggplot(agg_years, aes(x = data_year)) + geom_bar() +
labs(title = "Distribution of Aggressive Assault") + theme_minimal()
bar_aggyear
rob_years <- sampled_data %>% filter(data_year %in% c("1991", "2000", "2010", "2020")
& offense_name == "Robbery")
rob_years <- rob_years %>% select(data_year, offense_name)
bar_robyear <- ggplot(rob_years, aes(x = data_year)) + geom_bar() +
labs(title = "Distribution of Robbery") + theme_minimal()
bar_robyear
burg_years <- sampled_data %>% filter(data_year %in% c("1991", "2000", "2010", "2020")
& offense_name == "Burglary")
burg_years <- burg_years %>% select(data_year, offense_name)
bar_burgyear <- ggplot(burg_years, aes(x = data_year)) + geom_bar() +
labs(title = "Distribution of Bulglary") + theme_minimal()
bar_burgyear
larc_years <- sampled_data %>% filter(data_year %in% c("1991", "2000", "2010", "2020")
& offense_name == "Larceny")
larc_years <- larc_years %>% select(data_year, offense_name)
bar_larcyear <- ggplot(larc_years, aes(x = data_year)) + geom_bar() +
labs(title = "Distribution of Larceny") + theme_minimal()
bar_larcyear
larc_years <- sampled_data %>% filter(data_year %in% c("1991", "2000", "2010", "2020")
& offense_name == "Larceny")
larc_years <- larc_years %>% select(data_year, offense_name)
bar_larcyear <- ggplot(larc_years, aes(x = data_year)) + geom_bar() +
labs(title = "Distribution of Larceny") + theme_minimal()
bar_larcyear
dam_years <- sampled_data %>% filter(data_year %in% c("1991", "2000", "2010", "2020")
& offense_name == "Damage&Int")
dam_years <- dam_years %>% select(data_year, offense_name)
bar_damyear <- ggplot(dam_years, aes(x = data_year)) + geom_bar() +
labs(title = "Distribution of Damage & Intimidation") + theme_minimal()
bar_damyear
ars_years <- sampled_data %>% filter(data_year %in% c("1991", "2000", "2010", "2020")
& offense_name == "Arson")
ars_years <- ars_years %>% select(data_year, offense_name)
bar_arsyear <- ggplot(ars_years, aes(x = data_year)) + geom_bar() +
labs(title = "Distribution of Arson") + theme_minimal()
bar_arsyear
drug_years <- sampled_data %>% filter(data_year %in% c("1991", "2000", "2010", "2020")
& offense_name == "Drug")
drug_years <- drug_years %>% select(data_year, offense_name)
bar_drugyear <- ggplot(drug_years, aes(x = data_year)) + geom_bar() +
labs(title = "Distribution of Drug") + theme_minimal()
bar_drugyear