The dataset relates to hate crimes and includes various columns with information incidents, agencies, locations, offenders, victims, and the nature of crimes.

The goal of this project is to make some visualizations that communicate the dynamics of relationships among some of the variables and distribution of some variables.

if(!require(ggplot2))install.packages("tidyverse")
## Loading required package: ggplot2
library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr     1.1.4     ✔ readr     2.1.5
## ✔ forcats   1.0.0     ✔ stringr   1.5.1
## ✔ lubridate 1.9.3     ✔ tibble    3.2.1
## ✔ purrr     1.0.2     ✔ tidyr     1.3.1
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
install.packages("dplyr")
## Warning: package 'dplyr' is in use and will not be installed
library(dplyr)
install.packages("ggplot2")
## Warning: package 'ggplot2' is in use and will not be installed
library(ggplot2)
data <- read.csv("hate_crime.csv")
set.seed(123)
sampled_data <- data %>% sample_n(240000)

Data Manipulation

Renaming Some of the Columns

sampled_data <- sampled_data %>% mutate(offense_name = 
                                          ifelse(offense_name =="Destruction/Damage/Vandalism of Property",
                                                 "Damage", offense_name))

sampled_data <- sampled_data %>% mutate(offense_name = 
                                          ifelse(offense_name =="Simple Assault",
                                                 "Sim.Assault", offense_name))

sampled_data <- sampled_data %>% mutate(offense_name = 
                                          ifelse(offense_name =="Aggravated Assault",
                                                 "Agg.Assault", offense_name))


sampled_data <- sampled_data %>% mutate(offense_name = 
                                          ifelse(offense_name ==
                                                   "Destruction/Damage/Vandalism of Property;Intimidation",
                                                 "Damage&Int", offense_name))


sampled_data <- sampled_data %>% mutate(offense_name = 
                                          ifelse(offense_name ==
                                                   "Burglary/Breaking & Entering",
                                                 "Burglary", offense_name))

sampled_data <- sampled_data %>% mutate(offense_name = 
                                          ifelse(offense_name ==
                                                   "All Other Larceny",
                                                 "Larceny", offense_name))


sampled_data <- sampled_data %>% mutate(offense_name = 
                                          ifelse(offense_name ==
                                                   "Drug/Narcotic Violations",
                                                 "Drug", offense_name))

Data Exploration

length(unique(sampled_data$offense_name))
## [1] 398
offense_frequency <- sampled_data %>%
                       group_by(offense_name) %>%
                       summarise(frequency = n()) %>%
                      arrange(desc(frequency))


view(offense_frequency)

sum((offense_frequency$frequency))
## [1] 240000

Data Visualization

incidents_per_year <- sampled_data %>%
                        group_by(data_year) %>%
                          summarize(total_incidents = n())
                          



trend1 <- ggplot(incidents_per_year, aes(x=data_year, y = total_incidents)) +
          geom_line(color = "black") +
          geom_point(color = "blue") +
          labs(title = "Total Incidents over Years", x = "Years", y = "Total Incidents") + 
          theme_minimal()
          
trend1

victims_per_year <- sampled_data %>%
                        group_by(data_year) %>%
                          summarize(total_individual_victims = n())
                          



trend2 <- ggplot(victims_per_year, aes(x=data_year, y = total_individual_victims)) +
          geom_line(color = "black") +
          geom_point(color = "blue") +
          labs(title = "Total Victims over Years", x = "Years", y = "Total Victims") + 
          theme_minimal()
          
trend2

var1991 <- sampled_data[sampled_data$data_year == 1991 & sampled_data$offense_name %in%
                            c("Intimidation", "Robbery", "Agg.Assault", "Sim.Assault", 
                              "Damage", "Arson", "Damage&Int", "Larceny", "Drug", "Burglary"), ]


bar_var1991 <- ggplot(var1991, aes(offense_name)) + geom_bar() +
                           labs(title = "Distribution of The Top 10 Offences in 1991", 
                                x = "Year: 1991", y = "Frequency") +
                           theme_minimal()
bar_var1991

var1995 <- sampled_data[sampled_data$data_year == 1995 & sampled_data$offense_name %in%
                            c("Intimidation", "Robbery", "Agg.Assault", "Sim.Assault", 
                              "Damage", "Arson", "Damage&Int", "Larceny", "Drug", "Burglary"), ]


bar_var1995 <- ggplot(var1995, aes(offense_name)) + geom_bar() +
                           labs(title = "Distribution of The Top 10 Offences in 1995", 
                                x = "Year:1995", y = "Frequency") +
                           theme_minimal()
bar_var1995

var2000 <- sampled_data[sampled_data$data_year == 2000 & sampled_data$offense_name %in%
                            c("Intimidation", "Robbery", "Agg.Assault", "Sim.Assault", 
                              "Damage", "Arson", "Damage&Int", "Larceny", "Drug", "Burglary"), ]

bar_var2000 <- ggplot(var2000, aes(offense_name)) + geom_bar() +
                           labs(title = "Distribution of The Top 10 Offences in 2000", 
                                x = "Year: 2000", y = "Frequency") +
                           theme_minimal()
bar_var2000

var2005 <- sampled_data[sampled_data$data_year == 2005 & sampled_data$offense_name %in%
                            c("Intimidation", "Robbery", "Agg.Assault", "Sim.Assault", 
                              "Damage", "Arson", "Damage&Int", "Larceny", "Drug", "Burglary"), ]


bar_var2005 <- ggplot(var2005, aes(offense_name)) + geom_bar() +
                           labs(title = "Distribution of The Top Offences in 2005", 
                                x = "Year: 2005", y = "Frequency") +
                           theme_minimal()
bar_var2005

var2010 <- sampled_data[sampled_data$data_year == 2010 & sampled_data$offense_name %in%
                            c("Intimidation", "Robbery", "Agg.Assault", "Sim.Assault", 
                              "Damage", "Arson", "Damage&Int", "Larceny", "Drug", "Burglary"), ]

bar_var2010 <- ggplot(var2010, aes(offense_name)) + geom_bar() +
                           labs(title = "Distribution of The Top 10 Offences in 2010", 
                                x = "Year: 2010", y = "Frequency") +
                           theme_minimal()
bar_var2010

var2015 <- sampled_data[sampled_data$data_year == 2015 & sampled_data$offense_name %in%
                            c("Intimidation", "Robbery", "Agg.Assault", "Sim.Assault", 
                              "Damage", "Arson", "Damage&Int", "Larceny", "Drug", "Burglary"), ]


bar_var2015 <- ggplot(var2015, aes(offense_name)) + geom_bar() +
                           labs(title = "Distribution of The Top 10 Offences in 2015", 
                                x = "Year: 2015", y = "Frequency") +
                           theme_minimal()
bar_var2015

var2020 <- sampled_data[sampled_data$data_year == 2020 & sampled_data$offense_name %in%
                            c("Intimidation", "Robbery", "Agg.Assault", "Sim.Assault", 
                              "Damage", "Arson", "Damage&Int", "Larceny", "Drug", "Burglary"), ]


bar_var2020 <- ggplot(var2020, aes(offense_name)) + geom_bar() +
                           labs(title = "Distribution of The Top Offences in 2020", 
                                x = "Year: 2020", y = "Frequency") +
                           theme_minimal()
bar_var2020

var2022 <- sampled_data[sampled_data$data_year == 2022 & sampled_data$offense_name %in%
                            c("Intimidation", "Robbery", "Agg.Assault", "Sim.Assault", 
                              "Damage", "Arson", "Damage&Int", "Larceny", "Drug", "Burglary"), ]


bar_var2022 <- ggplot(var2022, aes(offense_name)) + geom_bar() +
                           labs(title = "Distribution of The Top 10 Offences in 2022 ", 
                                x = "Year: 2022", y = "Frequency") +
                           theme_minimal()
bar_var2022

damage <- sampled_data%>%
                filter(offense_name == "Damage")


hist_damage <- ggplot(damage, aes(data_year)) + geom_histogram(binwidth = 1, fill = "grey", 
                  color= "black" ) +
                           labs(title = "Distribution of Damage",x = "Damage", 
                                y = "Frequency") +
                           theme_minimal()

hist_damage

damage <- sampled_data%>%
                filter(offense_name == "Damage")


damage_per_year <- damage %>%
                        group_by(data_year) %>%
                          summarize(damage = n())


damage_trend <- ggplot(damage_per_year, aes(x=data_year, y = damage)) +
          geom_line(color = "black") +
          geom_point(color = "blue") +
          labs(title = "Trend for Damage over the Years", x = "Years", y = "Total Damages") + 
          theme_minimal()
          
damage_trend

damreg1991 <- sampled_data %>% filter(data_year == "1991" & offense_name == "Damage")
                        

bar_damreg1991 <- ggplot(damreg1991, aes(region_name)) + geom_bar(fill = "grey", color = "black") +
                      labs(title = 
                             "Distribution of Damage By Region in 1991", 
                           x = "Region", y = "Frequency") + theme_minimal()

bar_damreg1991

damreg2000 <- sampled_data %>% filter(data_year == "2000" & offense_name == "Damage")
                        

bar_damreg2000 <- ggplot(damreg2000, aes(region_name)) + geom_bar(fill = "grey", color = "black") +
                      labs(title = 
                             "Distribution of Damage By Region in 2000", 
                           x = "Region", y = "Frequency") + theme_minimal()

bar_damreg2000

damreg2010 <- sampled_data %>% filter(data_year == "2010" & offense_name == "Damage")
                        

bar_damreg2010 <- ggplot(damreg2010, aes(region_name)) + geom_bar(fill = "grey", color = "black") +
                      labs(title = 
                             "Distribution of Damage By Region in 2010", 
                           x = "Region", y = "Frequency") + theme_minimal()

bar_damreg2010

damreg2020 <- sampled_data %>% filter(data_year == "2020" & offense_name == "Damage")
                        

bar_damreg2020 <- ggplot(damreg2020, aes(region_name)) + geom_bar(fill = "grey", color = "black") +
                      labs(title = 
                             "Distribution of Damage By Region in 2020", 
                           x = "Region", y = "Frequency") + theme_minimal()

bar_damreg2020

damreg2022 <- sampled_data %>% filter(data_year == "2022" & offense_name == "Damage")
                        

bar_damreg2022 <- ggplot(damreg2022, aes(region_name)) + geom_bar(fill = "grey", color = "black") +
                      labs(title = 
                             "Distribution of Damage By Region in 2022", 
                           x = "Region", y = "Frequency") + theme_minimal()

bar_damreg2022

intimidations <- sampled_data%>%
                filter(offense_name == "Intimidation")


hist_intimidations <- ggplot(intimidations, aes(data_year)) + geom_histogram(binwidth = 1, fill = "grey", 
                  color= "black" ) +
                           labs(title = "Distribution of Intimidation over the Years",x = "Intimidation", 
                                y = "Frequency") +
                           theme_minimal()

hist_intimidations

intimidate <- sampled_data%>%
                filter(offense_name == "Intimidation")


intimidate_per_year <- intimidate %>%
                        group_by(data_year) %>%
                          summarize(intimidate = n())


intimidate_trend <- ggplot(intimidate_per_year, aes(x=data_year, y = intimidate)) +
          geom_line(color = "black") +
          geom_point(color = "blue") +
          labs(title = "Trend for Intimidation over the Years", x = "Years", y = "Total Intimidations") + 
          theme_minimal()
          
intimidate_trend

reg1991 <- sampled_data %>% filter(data_year == "1991" & offense_name == "Intimidation")
                        

bar_reg1991 <- ggplot(reg1991, aes(region_name)) + geom_bar(fill = "grey", color = "black") +
                      labs(title = 
                             "Distribution of Intimidation By Region in 1991", 
                           x = "Region", y = "Frequency") + theme_minimal()

bar_reg1991

reg2000 <- sampled_data %>% filter(data_year == "2000" & offense_name == "Intimidation")
                        

bar_reg2000 <- ggplot(reg2000, aes(region_name)) + geom_bar(fill = "grey", color = "black") +
                      labs(title = 
                             "Distribution of Intimidation By Region in 2000", 
                           x = "Region", y = "Frequency") + theme_minimal()

bar_reg2000

reg2010 <- sampled_data %>% filter(data_year == "2010" & offense_name == "Intimidation")
                        

bar_reg2010 <- ggplot(reg2010, aes(region_name)) + geom_bar(fill = "grey", color = "black") +
                      labs(title = 
                             "Distribution of Intimidation By Region in 2010", 
                           x = "Region", y = "Frequency") + theme_minimal()

bar_reg2010

reg2020 <- sampled_data %>% filter(data_year == "2020" & offense_name == "Intimidation")
                        

bar_reg2020 <- ggplot(reg2020, aes(region_name)) + geom_bar(fill = "grey", color = "black") +
                      labs(title = 
                             "Distribution of Intimidation By Region in 2020", 
                           x = "Region", y = "Frequency") + theme_minimal()

bar_reg2020

reg2022 <- sampled_data %>% filter(data_year == "2022" & offense_name == "Intimidation")
                        

bar_reg2022 <- ggplot(reg2022, aes(region_name)) + geom_bar(fill = "grey", color = "black") +
                      labs(title = 
                             "Distribution of Intimidation By Region in 2022", 
                           x = "Region", y = "Frequency") + theme_minimal()

bar_reg2022

simple_assault <- sampled_data%>%
                filter(offense_name == "Sim.Assault")


hist_simple_assault <- ggplot(simple_assault, aes(data_year)) + geom_histogram(binwidth = 1, fill = "grey", 
                  color= "black" ) +
                           labs(title = "Distribution of Simple Assault",x = "Simple Assault", 
                                y = "Frequency") +
                           theme_minimal()

hist_simple_assault

simp.assault <- sampled_data%>%
                filter(offense_name == "Sim.Assault")

simp.assault_per_year <- simp.assault %>%
                        group_by(data_year) %>%
                          summarize(simp.assault = n())

simp.assault_trend <- ggplot(simp.assault_per_year, aes(x=data_year, y = simp.assault)) +
          geom_line(color = "black") +
          geom_point(color = "blue") +
          labs(title = "Trend for Simple Assaults over the Years", x = "Years", y = "Total Simple Assaults") + 
          theme_minimal()
          
simp.assault_trend

simreg1991 <- sampled_data %>% filter(data_year == "1991" & offense_name == "Sim.Assault")
                        

bar_simreg1991 <- ggplot(simreg1991, aes(region_name)) + geom_bar(fill = "grey", color = "black") +
                      labs(title = 
                             "Distribution of Simple Assault By Region in 1991", 
                           x = "Region", y = "Frequency") + theme_minimal()

bar_simreg1991

simreg2000 <- sampled_data %>% filter(data_year == "2000" & offense_name == "Sim.Assault")
                        

bar_simreg2000 <- ggplot(simreg2000, aes(region_name)) + geom_bar(fill = "grey", color = "black") +
                      labs(title = 
                             "Distribution of Simple Assault By Region in 2000", 
                           x = "Region", y = "Frequency") + theme_minimal()

bar_simreg2000

simreg2010 <- sampled_data %>% filter(data_year == "2010" & offense_name == "Sim.Assault")
                        

bar_simreg2010 <- ggplot(simreg2010, aes(region_name)) + geom_bar(fill = "grey", color = "black") +
                      labs(title = 
                             "Distribution of Simple Assault By Region in 2010", 
                           x = "Region", y = "Frequency") + theme_minimal()

bar_simreg2010

simreg2020 <- sampled_data %>% filter(data_year == "2020" & offense_name == "Sim.Assault")
                        

bar_simreg2020 <- ggplot(simreg2020, aes(region_name)) + geom_bar(fill = "grey", color = "black") +
                      labs(title = 
                             "Distribution of Simple Assault By Region in 2020", 
                           x = "Region", y = "Frequency") + theme_minimal()

bar_simreg2020

simreg2022 <- sampled_data %>% filter(data_year == "2022" & offense_name == "Sim.Assault")
                        

bar_simreg2022 <- ggplot(simreg2022, aes(region_name)) + geom_bar(fill = "grey", color = "black") +
                      labs(title = 
                             "Distribution of Simple Assault By Region in 2022", 
                           x = "Region", y = "Frequency") + theme_minimal()

bar_simreg2022

aggravated_assault <- sampled_data%>%
                filter(offense_name == "Agg.Assault")


hist_aggravated_assault <- ggplot(aggravated_assault, aes(data_year)) + geom_histogram(binwidth = 1, fill = "grey", 
                  color= "black" ) +
                           labs(title = "Distribution of Aggravated Assault",x = "Aggravated Assault", 
                                y = "Frequency") +
                           theme_minimal()

hist_aggravated_assault

agg.assault <- sampled_data%>%
                filter(offense_name == "Agg.Assault")


agg.assault_per_year <- agg.assault %>%
                        group_by(data_year) %>%
                          summarize(agg.assault = n())


agg.assault_trend <- ggplot(agg.assault_per_year, aes(x=data_year, y = agg.assault)) +
          geom_line(color = "black") +
          geom_point(color = "blue") +
          labs(title = "Trend for Aggresive Assaults over the Years", x = "Years", 
               y = "Total Aggressive Assaults") + 
          theme_minimal()
          
agg.assault_trend

aggreg1991 <- sampled_data %>% filter(data_year == "1991" & offense_name == "Agg.Assault")
                        

bar_aggreg1991 <- ggplot(aggreg1991, aes(region_name)) + geom_bar(fill = "grey", color = "black") +
                      labs(title = 
                             "Distribution of Aggresive Assault By Region in 1991", 
                           x = "Region", y = "Frequency") + theme_minimal()

bar_aggreg1991

aggreg2000 <- sampled_data %>% filter(data_year == "2000" & offense_name == "Agg.Assault")
                        

bar_aggreg2000 <- ggplot(aggreg2000, aes(region_name)) + geom_bar(fill = "grey", color = "black") +
                      labs(title = 
                             "Distribution of Aggresive Assault By Region in 2000", 
                           x = "Region", y = "Frequency") + theme_minimal()

bar_aggreg2000

aggreg2010 <- sampled_data %>% filter(data_year == "2010" & offense_name == "Agg.Assault")
                        

bar_aggreg2010 <- ggplot(aggreg2010, aes(region_name)) + geom_bar(fill = "grey", color = "black") +
                      labs(title = 
                             "Distribution of Aggresive Assault By Region in 2010", 
                           x = "Region", y = "Frequency") + theme_minimal()

bar_aggreg2010

aggreg2020 <- sampled_data %>% filter(data_year == "2020" & offense_name == "Agg.Assault")
                        

bar_aggreg2020 <- ggplot(aggreg2020, aes(region_name)) + geom_bar(fill = "grey", color = "black") +
                      labs(title = 
                             "Distribution of Aggresive Assault By Region in 2020", 
                           x = "Region", y = "Frequency") + theme_minimal()

bar_aggreg2020

aggreg2022 <- sampled_data %>% filter(data_year == "2022" & offense_name == "Agg.Assault")
                        

bar_aggreg2022 <- ggplot(aggreg2022, aes(region_name)) + geom_bar(fill = "grey", color = "black") +
                      labs(title = 
                             "Distribution of Aggresive Assault By Region in 2022", 
                           x = "Region", y = "Frequency") + theme_minimal()

bar_aggreg2022

robb <- sampled_data%>%
                filter(offense_name == "Robbery")


hist_robb <- ggplot(robb, aes(data_year)) + geom_histogram(binwidth = 1, fill = "grey", 
                  color= "black" ) +
                           labs(title = "Distribution of Robbery",x = "Robbery", 
                                y = "Frequency") +
                           theme_minimal()

hist_robb

robbery <- sampled_data%>%
                filter(offense_name == "Robbery")

robbery_per_year <- robbery %>%
                        group_by(data_year) %>%
                          summarize(robbery = n())


robbery_trend <- ggplot(robbery_per_year, aes(x=data_year, y = robbery)) +
          geom_line(color = "black") +
          geom_point(color = "blue") +
          labs(title = "Trend for Robbery over the Years", x = "Years", 
               y = "Total Robbery") + 
          theme_minimal()
          
robbery_trend

robreg1991 <- sampled_data %>% filter(data_year == "1991" & offense_name == "Robbery")
                        

bar_robreg1991 <- ggplot(robreg1991, aes(region_name)) + geom_bar(fill = "grey", color = "black") +
                      labs(title = 
                             "Distribution of Robbery By Region in 1991", 
                           x = "Region", y = "Frequency") + theme_minimal()

bar_robreg1991

robreg2000 <- sampled_data %>% filter(data_year == "2000" & offense_name == "Robbery")
                        

bar_robreg2000 <- ggplot(robreg2000, aes(region_name)) + geom_bar(fill = "grey", color = "black") +
                      labs(title = 
                             "Distribution of Robbery By Region in 2000", 
                           x = "Region", y = "Frequency") + theme_minimal()

bar_robreg2000

robreg2010 <- sampled_data %>% filter(data_year == "2010" & offense_name == "Robbery")
                        

bar_robreg2010 <- ggplot(robreg2010, aes(region_name)) + geom_bar(fill = "grey", color = "black") +
                      labs(title = 
                             "Distribution of Robbery By Region in 2010", 
                           x = "Region", y = "Frequency") + theme_minimal()

bar_robreg2010

robreg2020 <- sampled_data %>% filter(data_year == "2020" & offense_name == "Robbery")
                        

bar_robreg2020 <- ggplot(robreg2020, aes(region_name)) + geom_bar(fill = "grey", color = "black") +
                      labs(title = 
                             "Distribution of Robbery By Region in 2020", 
                           x = "Region", y = "Frequency") + theme_minimal()

bar_robreg2020

robreg2022 <- sampled_data %>% filter(data_year == "2022" & offense_name == "Robbery")
                        

bar_robreg2022 <- ggplot(robreg2022, aes(region_name)) + geom_bar(fill = "grey", color = "black") +
                      labs(title = 
                             "Distribution of Robbery By Region in 2022", 
                           x = "Region", y = "Frequency") + theme_minimal()

bar_robreg2022

Burglary <- sampled_data%>%
                filter(offense_name == "Burglary")


hist_Burglary <- ggplot(Burglary, aes(data_year)) + geom_histogram(binwidth = 1, fill = "grey", 
                  color= "black" ) +
                           labs(title = "Distribution of Burglary",x = "Burglary", 
                                y = "Frequency") +
                           theme_minimal()

hist_Burglary

burg <- sampled_data%>%
                filter(offense_name == "Burglary")

burg_per_year <- burg %>%
                        group_by(data_year) %>%
                          summarize(burg = n())

burg_trend <- ggplot(burg_per_year, aes(x=data_year, y = burg)) +
          geom_line(color = "black") +
          geom_point(color = "blue") +
          labs(title = "Trend for Burglary over the Years", x = "Years", 
               y = "Total Burglary") + 
          theme_minimal()
          
burg_trend

burreg1991 <- sampled_data %>% filter(data_year == "1991" & offense_name == "Burglary")
                        

bar_robreg1991 <- ggplot(burreg1991, aes(region_name)) + geom_bar(fill = "grey", color = "black") +
                      labs(title = 
                             "Distribution of Burglary By Region in 1991", 
                           x = "Region", y = "Frequency") + theme_minimal()

bar_robreg1991

burreg2000 <- sampled_data %>% filter(data_year == "2000" & offense_name == "Burglary")
                        

bar_robreg2000 <- ggplot(burreg2000, aes(region_name)) + geom_bar(fill = "grey", color = "black") +
                      labs(title = 
                             "Distribution of Burglary By Region in 2000", 
                           x = "Region", y = "Frequency") + theme_minimal()

bar_robreg2000

burreg2010 <- sampled_data %>% filter(data_year == "2010" & offense_name == "Burglary")
                        

bar_robreg2000 <- ggplot(burreg2010, aes(region_name)) + geom_bar(fill = "grey", color = "black") +
                      labs(title = 
                             "Distribution of Burglary By Region in 2010", 
                           x = "Region", y = "Frequency") + theme_minimal()

bar_robreg2010

burreg2020 <- sampled_data %>% filter(data_year == "2020" & offense_name == "Burglary")
                        

bar_robreg2020 <- ggplot(burreg2020, aes(region_name)) + geom_bar(fill = "grey", color = "black") +
                      labs(title = 
                             "Distribution of Burglary By Region in 2020", 
                           x = "Region", y = "Frequency") + theme_minimal()

bar_robreg2020

burreg2022 <- sampled_data %>% filter(data_year == "2022" & offense_name == "Burglary")
                        

bar_burreg2022 <- ggplot(burreg2022, aes(region_name)) + geom_bar(fill = "grey", color = "black") +
                      labs(title = 
                             "Distribution of Burglary By Region in 2022", 
                           x = "Region", y = "Frequency") + theme_minimal()

bar_burreg2022

Larceny <- sampled_data%>%
                filter(offense_name == "Larceny")


hist_Larceny <- ggplot(Larceny, aes(data_year)) + geom_histogram(binwidth = 1, fill = "grey", 
                  color= "black" ) +
                           labs(title = "Distribution of Larceny",x = "Larceny", 
                                y = "Frequency") +
                           theme_minimal()

hist_Larceny

larc <- sampled_data%>%
                filter(offense_name == "Larceny")

larc_per_year <- larc %>%
                        group_by(data_year) %>%
                          summarize(larc = n())


larc_trend <- ggplot(larc_per_year, aes(x=data_year, y = larc)) +
          geom_line(color = "black") +
          geom_point(color = "blue") +
          labs(title = "Trend for Larceny over the Years", x = "Years", 
               y = "Total Larceny") + 
          theme_minimal()
          
larc_trend

larreg2000 <- sampled_data %>% filter(data_year == "2000" & offense_name == "Larceny")
                        

bar_larreg2000 <- ggplot(larreg2000, aes(region_name)) + geom_bar(fill = "grey", color = "black") +
                      labs(title = 
                             "Distribution of Larceny By Region in 2000", 
                           x = "Region", y = "Frequency") + theme_minimal()

bar_larreg2000

larreg2010 <- sampled_data %>% filter(data_year == "2010" & offense_name == "Larceny")
                        

bar_larreg2010 <- ggplot(larreg2010, aes(region_name)) + geom_bar(fill = "grey", color = "black") +
                      labs(title = 
                             "Distribution of Larceny By Region in 2010", 
                           x = "Region", y = "Frequency") + theme_minimal()

bar_larreg2010

larreg2020 <- sampled_data %>% filter(data_year == "2020" & offense_name == "Larceny")
                        

bar_larreg2020 <- ggplot(larreg2020, aes(region_name)) + geom_bar(fill = "grey", color = "black") +
                      labs(title = 
                             "Distribution of Larceny By Region in 2020", 
                           x = "Region", y = "Frequency") + theme_minimal()

bar_larreg2020

larreg2022 <- sampled_data %>% filter(data_year == "2022" & offense_name == "Larceny")
                        

bar_larreg2022 <- ggplot(larreg2022, aes(region_name)) + geom_bar(fill = "grey", color = "black") +
                      labs(title = 
                             "Distribution of Larceny By Region in 2022", 
                           x = "Region", y = "Frequency") + theme_minimal()

bar_larreg2022

DI <- sampled_data%>%
                filter(offense_name == "Damage&Int")


hist_DI <- ggplot(DI, aes(data_year)) + geom_histogram(binwidth = 1, fill = "grey", 
                  color= "black" ) +
                           labs(title = "Distribution of Damage&Intimidation",x = "Damage&Intimidation", 
                                y = "Frequency") +
                           theme_minimal()

hist_DI

DamInt <- sampled_data%>%
                filter(offense_name == "Damage&Int")



DamInt_per_year <- DamInt %>%
                        group_by(data_year) %>%
                          summarize(DamInt = n())


DamInt_trend <- ggplot(DamInt_per_year, aes(x=data_year, y = DamInt)) +
          geom_line(color = "black") +
          geom_point(color = "blue") +
          labs(title = "Trend for Damage&Intimidation over the Years", x = "Years", 
               y = "Total Damage&Intimidation") + 
          theme_minimal()
          
DamInt_trend

direg1991 <- sampled_data %>% filter(data_year == "1991" & offense_name == "Damage&Int")
                        

bar_direg1991 <- ggplot(direg1991, aes(region_name)) + geom_bar(fill = "grey", color = "black") +
                      labs(title = 
                             "Distribution of Damage & Intimidation By Region in 1991", 
                           x = "Region", y = "Frequency") + theme_minimal()

bar_direg1991

direg2000 <- sampled_data %>% filter(data_year == "2000" & offense_name == "Damage&Int")
                        

bar_direg2000 <- ggplot(direg2000, aes(region_name)) + geom_bar(fill = "grey", color = "black") +
                      labs(title = 
                             "Distribution of Damage & Intimidation By Region in 2000", 
                           x = "Region", y = "Frequency") + theme_minimal()

bar_direg2000

direg2010 <- sampled_data %>% filter(data_year == "2010" & offense_name == "Damage&Int")
                        

bar_direg2010 <- ggplot(direg2010, aes(region_name)) + geom_bar(fill = "grey", color = "black") +
                      labs(title = 
                             "Distribution of Damage & Intimidation By Region in 2010", 
                           x = "Region", y = "Frequency") + theme_minimal()

bar_direg2010

direg2020 <- sampled_data %>% filter(data_year == "2020" & offense_name == "Damage&Int")
                        

bar_direg2020 <- ggplot(direg2020, aes(region_name)) + geom_bar(fill = "grey", color = "black") +
                      labs(title = 
                             "Distribution of Damage & Intimidation By Region in 2020", 
                           x = "Region", y = "Frequency") + theme_minimal()

bar_direg2020

direg2022 <- sampled_data %>% filter(data_year == "2022" & offense_name == "Damage&Int")
                        

bar_direg2022 <- ggplot(direg2022, aes(region_name)) + geom_bar(fill = "grey", color = "black") +
                      labs(title = 
                             "Distribution of Damage & Intimidation By Region in 2022", 
                           x = "Region", y = "Frequency") + theme_minimal()

bar_direg2022

Arson <- sampled_data%>%
                filter(offense_name == "Arson")


Arson <- ggplot(Arson, aes(data_year)) + geom_histogram(binwidth = 1, fill = "grey", 
                  color= "black" ) +
                           labs(title = "Distribution of Arson",x = "Arson", 
                                y = "Frequency") +
                           theme_minimal()

Arson

ars <- sampled_data%>%
                filter(offense_name == "Arson")



ars_per_year <- ars %>%
                        group_by(data_year) %>%
                          summarize(ars = n())


ars_trend <- ggplot(ars_per_year, aes(x=data_year, y = ars)) +
          geom_line(color = "black") +
          geom_point(color = "blue") +
          labs(title = "Trend for Arson over the Years", x = "Years", 
               y = "Total Arson") + 
          theme_minimal()
          
ars_trend

arsreg1991 <- sampled_data %>% filter(data_year == "1991" & offense_name == "Arson")
                        

bar_arsreg1991 <- ggplot(arsreg1991, aes(region_name)) + geom_bar(fill = "grey", color = "black") +
                      labs(title = 
                             "Distribution of Arson By Region in 1991", 
                           x = "Region", y = "Frequency") + theme_minimal()

bar_arsreg1991

arsreg2000 <- sampled_data %>% filter(data_year == "2000" & offense_name == "Arson")
                        

bar_arsreg2000 <- ggplot(arsreg2000, aes(region_name)) + geom_bar(fill = "grey", color = "black") +
                      labs(title = 
                             "Distribution of Arson By Region in 2000", 
                           x = "Region", y = "Frequency") + theme_minimal()

bar_arsreg2000

arsreg2010 <- sampled_data %>% filter(data_year == "2010" & offense_name == "Arson")
                        

bar_arsreg2010 <- ggplot(arsreg2010, aes(region_name)) + geom_bar(fill = "grey", color = "black") +
                      labs(title = 
                             "Distribution of Arson By Region in 2010", 
                           x = "Region", y = "Frequency") + theme_minimal()

bar_arsreg2010

arsreg2020 <- sampled_data %>% filter(data_year == "2020" & offense_name == "Arson")
                        

bar_arsreg2020 <- ggplot(arsreg2020, aes(region_name)) + geom_bar(fill = "grey", color = "black") +
                      labs(title = 
                             "Distribution of Arson By Region in 2020", 
                           x = "Region", y = "Frequency") + theme_minimal()

bar_arsreg2020

arsreg2022 <- sampled_data %>% filter(data_year == "2022" & offense_name == "Arson")
                        

bar_arsreg2022 <- ggplot(arsreg2022, aes(region_name)) + geom_bar(fill = "grey", color = "black") +
                      labs(title = 
                             "Distribution of Arson By Region in 2022", 
                           x = "Region", y = "Frequency") + theme_minimal()

bar_arsreg2022

drugs <- sampled_data%>%
                filter(offense_name == "Drug")


drugs <- ggplot(drugs, aes(data_year)) + geom_histogram(binwidth = 1, fill = "grey", 
                  color= "black" ) +
                           labs(title = "Distribution of Drug",x = "Drug", 
                                y = "Frequency") +
                           theme_minimal()

drugs

dg <- sampled_data%>%
                filter(offense_name == "Drug")



dg_per_year <- dg %>%
  group_by(data_year) %>%
  summarize(dg = n())


dg_trend <- ggplot(dg_per_year, aes(x=data_year, y = dg)) +
          geom_line(color = "black") +
          geom_point(color = "blue") +
          labs(title = "Trend for Drug over the Years", x = "Years", 
               y = "Total Drugs") + 
          theme_minimal()
          
dg_trend

drureg2000 <- sampled_data %>% filter(data_year == "2000" & offense_name == "Drug")
                        

bar_drureg2000 <- ggplot(drureg2000, aes(region_name)) + geom_bar(fill = "grey", color = "black") +
                      labs(title = 
                             "Distribution of Drugs By Region in 2000", 
                           x = "Region", y = "Frequency") + theme_minimal()

bar_drureg2000

drureg2010 <- sampled_data %>% filter(data_year == "2010" & offense_name == "Drug")
                        

bar_drureg2010 <- ggplot(drureg2010, aes(region_name)) + geom_bar(fill = "grey", color = "black") +
                      labs(title = 
                             "Distribution of Drugs By Region in 2010", 
                           x = "Region", y = "Frequency") + theme_minimal()

bar_drureg2010

drureg2020 <- sampled_data %>% filter(data_year == "2020" & offense_name == "Drug")
                        

bar_drureg2020 <- ggplot(drureg2020, aes(region_name)) + geom_bar(fill = "grey", color = "black") +
                      labs(title = 
                             "Distribution of Drugs By Region in 2020", 
                           x = "Region", y = "Frequency") + theme_minimal()

bar_drureg2020

drureg2022 <- sampled_data %>% filter(data_year == "2022" & offense_name == "Drug")
                        

bar_drureg2022 <- ggplot(drureg2022, aes(region_name)) + geom_bar(fill = "grey", color = "black") +
                      labs(title = 
                             "Distribution of Drugs By Region in 2022", 
                           x = "Region", y = "Frequency") + theme_minimal()

bar_drureg2022

scatter1 <- ggplot(sampled_data, aes(x = total_offender_count, y = victim_count)) + geom_point(fill = "grey", 
                  color= "black" ) +
                           labs(title = "Relationship between Victims and Offenders",x = "Victim Count", 
                                y = "Offender Count") +
                           theme_minimal()
scatter1

scatter1 <- ggplot(sampled_data, aes(x = victim_count, y = total_offender_count)) + geom_point(fill = "grey", 
                  color= "black" ) +
              geom_smooth(method = "lm", color = "black", se = FALSE) +
                           labs(title = "Relationship between Offenders and Victims",x = "Victim Count", 
                                y = "Offender Count") +
                           theme_minimal()
scatter1
## `geom_smooth()` using formula = 'y ~ x'

dam_years <- sampled_data %>% filter(data_year %in% c("1991", "2000", "2010", "2020") 
                                     & offense_name == "Damage")

dam_years <- dam_years %>% select(data_year, offense_name)

bar_damyear <- ggplot(dam_years, aes(x = data_year)) + geom_bar() +
                 labs(title = "Distribution of Damage") + theme_minimal()

bar_damyear

int_years <- sampled_data %>% filter(data_year %in% c("1991", "2000", "2010", "2020") 
                                     & offense_name == "Intimidation")


int_years <- int_years %>% select(data_year, offense_name)


bar_intyear <- ggplot(int_years, aes(x = data_year)) + geom_bar() +
                 labs(title = "Distribution of Intimidation") + theme_minimal()

bar_intyear

sim_years <- sampled_data %>% filter(data_year %in% c("1991", "2000", "2010", "2020") 
                                     & offense_name == "Sim.Assault")

sim_years <- sim_years %>% select(data_year, offense_name)


bar_simyear <- ggplot(sim_years, aes(x = data_year)) + geom_bar() +
                 labs(title = "Distribution of Simple Assault") + theme_minimal()

bar_simyear

agg_years <- sampled_data %>% filter(data_year %in% c("1991", "2000", "2010", "2020") 
                                     & offense_name == "Agg.Assault")

agg_years <- agg_years %>% select(data_year, offense_name)



bar_aggyear <- ggplot(agg_years, aes(x = data_year)) + geom_bar() +
                 labs(title = "Distribution of Aggressive Assault") + theme_minimal()

bar_aggyear

rob_years <- sampled_data %>% filter(data_year %in% c("1991", "2000", "2010", "2020") 
                                     & offense_name == "Robbery")

rob_years <- rob_years %>% select(data_year, offense_name)



bar_robyear <- ggplot(rob_years, aes(x = data_year)) + geom_bar() +
                 labs(title = "Distribution of Robbery") + theme_minimal()

bar_robyear

burg_years <- sampled_data %>% filter(data_year %in% c("1991", "2000", "2010", "2020") 
                                     & offense_name == "Burglary")

burg_years <- burg_years %>% select(data_year, offense_name)



bar_burgyear <- ggplot(burg_years, aes(x = data_year)) + geom_bar() +
                 labs(title = "Distribution of Bulglary") + theme_minimal()

bar_burgyear

larc_years <- sampled_data %>% filter(data_year %in% c("1991", "2000", "2010", "2020") 
                                     & offense_name == "Larceny")

larc_years <- larc_years %>% select(data_year, offense_name)



bar_larcyear <- ggplot(larc_years, aes(x = data_year)) + geom_bar() +
                 labs(title = "Distribution of Larceny") + theme_minimal()

bar_larcyear

larc_years <- sampled_data %>% filter(data_year %in% c("1991", "2000", "2010", "2020") 
                                     & offense_name == "Larceny")

larc_years <- larc_years %>% select(data_year, offense_name)



bar_larcyear <- ggplot(larc_years, aes(x = data_year)) + geom_bar() +
                 labs(title = "Distribution of Larceny") + theme_minimal()

bar_larcyear

dam_years <- sampled_data %>% filter(data_year %in% c("1991", "2000", "2010", "2020") 
                                     & offense_name == "Damage&Int")

dam_years <- dam_years %>% select(data_year, offense_name)



bar_damyear <- ggplot(dam_years, aes(x = data_year)) + geom_bar() +
                 labs(title = "Distribution of Damage & Intimidation") + theme_minimal()

bar_damyear

ars_years <- sampled_data %>% filter(data_year %in% c("1991", "2000", "2010", "2020") 
                                     & offense_name == "Arson")

ars_years <- ars_years %>% select(data_year, offense_name)



bar_arsyear <- ggplot(ars_years, aes(x = data_year)) + geom_bar() +
                 labs(title = "Distribution of Arson") + theme_minimal()

bar_arsyear

drug_years <- sampled_data %>% filter(data_year %in% c("1991", "2000", "2010", "2020") 
                                     & offense_name == "Drug")

drug_years <- drug_years %>% select(data_year, offense_name)



bar_drugyear <- ggplot(drug_years, aes(x = data_year)) + geom_bar() +
                 labs(title = "Distribution of Drug") + theme_minimal()

bar_drugyear