#Loading Necessary Libraries
library(data.table)
library(ggplot2)
library(sf)
## Linking to GEOS 3.11.0, GDAL 3.5.3, PROJ 9.1.0; sf_use_s2() is TRUE
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:data.table':
##
## between, first, last
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(stringr)
library(ggpubr)
library(lubridate)
##
## Attaching package: 'lubridate'
## The following objects are masked from 'package:data.table':
##
## hour, isoweek, mday, minute, month, quarter, second, wday, week,
## yday, year
## The following objects are masked from 'package:base':
##
## date, intersect, setdiff, union
library(tseries)
## Registered S3 method overwritten by 'quantmod':
## method from
## as.zoo.data.frame zoo
library(Metrics)
library(gridExtra)
##
## Attaching package: 'gridExtra'
## The following object is masked from 'package:dplyr':
##
## combine
library(forecast)
##
## Attaching package: 'forecast'
## The following object is masked from 'package:Metrics':
##
## accuracy
## The following object is masked from 'package:ggpubr':
##
## gghistogram
library(padr)
library(plotly)
##
## Attaching package: 'plotly'
## The following object is masked from 'package:ggplot2':
##
## last_plot
## The following object is masked from 'package:stats':
##
## filter
## The following object is masked from 'package:graphics':
##
## layout
#Data Preparation
Import data
Dataset <- fread(file = "/Users/divyakampalli/Desktop/DPA/finalproject/crimeanalysis.csv", header = T, sep = ",", na.strings = "")
Top 5 rows of the Dataset
head(Dataset)
Structure of the Dataset
str(Dataset)
## Classes 'data.table' and 'data.frame': 7946816 obs. of 22 variables:
## $ ID : int 5741943 25953 26038 13279676 13274752 1930689 13203321 13210088 13210004 13210062 ...
## $ Case Number : chr "HN549294" "JE240540" "JE279849" "JG507211" ...
## $ Date : chr "08/25/2007 09:22:18 AM" "05/24/2021 03:06:00 PM" "06/26/2021 09:24:00 AM" "11/09/2023 07:30:00 AM" ...
## $ Block : chr "074XX N ROGERS AVE" "020XX N LARAMIE AVE" "062XX N MC CORMICK RD" "019XX W BYRON ST" ...
## $ IUCR : chr "0560" "0110" "0110" "0620" ...
## $ Primary Type : chr "ASSAULT" "HOMICIDE" "HOMICIDE" "BURGLARY" ...
## $ Description : chr "SIMPLE" "FIRST DEGREE MURDER" "FIRST DEGREE MURDER" "UNLAWFUL ENTRY" ...
## $ Location Description: chr "OTHER" "STREET" "PARKING LOT" "APARTMENT" ...
## $ Arrest : logi FALSE TRUE TRUE FALSE FALSE TRUE ...
## $ Domestic : logi FALSE FALSE FALSE FALSE FALSE FALSE ...
## $ Beat : int 2422 2515 1711 1922 632 512 122 1225 333 1732 ...
## $ District : int 24 25 17 19 6 5 1 12 3 17 ...
## $ Ward : int 49 36 50 47 6 NA 42 27 7 30 ...
## $ Community Area : int 1 19 13 5 44 NA 32 28 43 21 ...
## $ FBI Code : chr "08A" "01A" "01A" "05" ...
## $ X Coordinate : int NA 1141387 1152781 1162518 1183071 NA 1174694 1160870 1190812 1151117 ...
## $ Y Coordinate : int NA 1913179 1941458 1925906 1847869 NA 1901831 1898642 1856743 1922554 ...
## $ Year : int 2007 2021 2021 2023 2023 2002 2023 2023 2023 2023 ...
## $ Updated On : chr "08/17/2015 03:03:40 PM" "11/18/2023 03:39:49 PM" "11/18/2023 03:39:49 PM" "11/18/2023 03:39:49 PM" ...
## $ Latitude : num NA 41.9 42 42 41.7 ...
## $ Longitude : num NA -87.8 -87.7 -87.7 -87.6 ...
## $ Location : chr NA "(41.917838056, -87.755968972)" "(41.995219444, -87.713354912)" "(41.952345086, -87.677975059)" ...
## - attr(*, ".internal.selfref")=<externalptr>
Summary of Dataset
summary(Dataset)
## ID Case Number Date Block
## Min. : 634 Length:7946816 Length:7946816 Length:7946816
## 1st Qu.: 3852320 Class :character Class :character Class :character
## Median : 7149126 Mode :character Mode :character Mode :character
## Mean : 7150988
## 3rd Qu.:10335352
## Max. :13292996
##
## IUCR Primary Type Description Location Description
## Length:7946816 Length:7946816 Length:7946816 Length:7946816
## Class :character Class :character Class :character Class :character
## Mode :character Mode :character Mode :character Mode :character
##
##
##
##
## Arrest Domestic Beat District Ward
## Mode :logical Mode :logical Min. : 111 Min. : 1.0 Min. : 1.0
## FALSE:5892780 FALSE:6581484 1st Qu.: 621 1st Qu.: 6.0 1st Qu.:10.0
## TRUE :2054036 TRUE :1365332 Median :1034 Median :10.0 Median :23.0
## Mean :1185 Mean :11.3 Mean :22.8
## 3rd Qu.:1731 3rd Qu.:17.0 3rd Qu.:34.0
## Max. :2535 Max. :31.0 Max. :50.0
## NA's :47 NA's :614854
## Community Area FBI Code X Coordinate Y Coordinate
## Min. : 0.0 Length:7946816 Min. : 0 Min. : 0
## 1st Qu.:23.0 Class :character 1st Qu.:1152998 1st Qu.:1859105
## Median :32.0 Mode :character Median :1166138 Median :1890771
## Mean :37.5 Mean :1164616 Mean :1885821
## 3rd Qu.:57.0 3rd Qu.:1176389 3rd Qu.:1909321
## Max. :77.0 Max. :1205119 Max. :1951622
## NA's :613478 NA's :87614 NA's :87614
## Year Updated On Latitude Longitude
## Min. :2001 Length:7946816 Min. :36.62 Min. :-91.69
## 1st Qu.:2005 Class :character 1st Qu.:41.77 1st Qu.:-87.71
## Median :2009 Mode :character Median :41.86 Median :-87.67
## Mean :2010 Mean :41.84 Mean :-87.67
## 3rd Qu.:2015 3rd Qu.:41.91 3rd Qu.:-87.63
## Max. :2023 Max. :42.02 Max. :-87.52
## NA's :87614 NA's :87614
## Location
## Length:7946816
## Class :character
## Mode :character
##
##
##
##
#Data Cleaning and Preprocessing
Extracting 5 past years’ data
CrimesDF <- Dataset[Year > 2018]
Renaming some of the variables
setnames(CrimesDF, c("Case Number", "Primary Type", "Location Description", "Community Area"), c("Case", "Type", "Locdescrip", "Community"))
Checking if there are any Duplicates
any(duplicated(CrimesDF[["Case"]]))
## [1] TRUE
Removing any duplicates in Case Number and testing again to check if there are any duplicates.
CrimesDF <- CrimesDF[!duplicated(CrimesDF[["Case"]])]
any(duplicated(CrimesDF[["Case"]]))
## [1] FALSE
Testing for missing values
any(is.na(CrimesDF))
## [1] TRUE
Finding the missing values in each coloumn.
colSums(is.na(CrimesDF))
## ID Case Date Block IUCR Type
## 0 0 0 0 0 0
## Description Locdescrip Arrest Domestic Beat District
## 0 6051 0 0 0 0
## Ward Community FBI Code X Coordinate Y Coordinate Year
## 48 2 0 16907 16907 0
## Updated On Latitude Longitude Location
## 0 16907 16907 16907
Replacing all NAs with similar values
CrimesDF$`Latitude` <- na.omit(CrimesDF$`Latitude`)[match(CrimesDF$`X Coordinate`, na.omit(CrimesDF$`X Coordinate`))]
colSums(is.na(CrimesDF))
## ID Case Date Block IUCR Type
## 0 0 0 0 0 0
## Description Locdescrip Arrest Domestic Beat District
## 0 6051 0 0 0 0
## Ward Community FBI Code X Coordinate Y Coordinate Year
## 48 2 0 16907 16907 0
## Updated On Latitude Longitude Location
## 0 16907 16907 16907
Removing NA in latitude, longitude, location, Case Number
CrimesDF <- CrimesDF[!is.na(CrimesDF[["Latitude"]])]
CrimesDF <- CrimesDF[!is.na(CrimesDF[["Case"]])]
colSums(is.na(CrimesDF))
## ID Case Date Block IUCR Type
## 0 0 0 0 0 0
## Description Locdescrip Arrest Domestic Beat District
## 0 4513 0 0 0 0
## Ward Community FBI Code X Coordinate Y Coordinate Year
## 47 1 0 0 0 0
## Updated On Latitude Longitude Location
## 0 0 0 0
Replacing all NAs with similar records
CrimesDF$`Locdescrip` <- na.omit(CrimesDF$`Locdescrip`)[match(CrimesDF$`Location`, na.omit(CrimesDF$`Location`))]
CrimesDF$`District` <- na.omit(CrimesDF$`District`)[match(CrimesDF$`Beat`, na.omit(CrimesDF$`Beat`))]
CrimesDF$`Ward` <- na.omit(CrimesDF$`Ward`)[match(CrimesDF$`Location`, na.omit(CrimesDF$`Location`))]
CrimesDF$`Community` <- na.omit(CrimesDF$`Community`)[match(CrimesDF$`Location`, na.omit(CrimesDF$`Location`))]
colSums(is.na(CrimesDF))
## ID Case Date Block IUCR Type
## 0 0 0 0 0 0
## Description Locdescrip Arrest Domestic Beat District
## 0 637 0 0 0 0
## Ward Community FBI Code X Coordinate Y Coordinate Year
## 5 1 0 0 0 0
## Updated On Latitude Longitude Location
## 0 0 0 0
CrimesDF <- CrimesDF[!is.na(CrimesDF[["Locdescrip"]])]
any(is.na(CrimesDF))
## [1] FALSE
CrimesDF <- CrimesDF[which(Community != 0),]
CrimesDF <- CrimesDF[, !c("ID", "IUCR", "Description", "FBI Code", "Block", "Ward", "X Coordinate", "Y Coordinate", "Updated On")]
CrimesDF[["Date"]] <- parse_date_time(CrimesDF[["Date"]], orders = "mdY IMSp")
Create four time intervals and Extract hours
tint <- c("0", "5.9", "11.9", "17.9", "23.9")
hours <- hour(CrimesDF[["Date"]])
CrimesDF[["Tint"]] <- cut(hours, breaks = tint, labels = c("0-5H", "6-11H", "12-17H", "18-24H"), include.lowest = T)
Create the column Day showing the weekday, month, season when the incident occurred
CrimesDF[["Day"]] <- wday(CrimesDF[["Date"]], label = T)
CrimesDF[["Month"]] <- month(CrimesDF[["Date"]], label = T)
quarters <- quarter(CrimesDF$Date)
sint <- c("0.9", "1.9", "2.9", "3.9", "4.9")
CrimesDF[["Season"]] <- cut(quarters, breaks = sint, labels = c("SPRING", "SUMMER", "FALL", "WINTER"))
Regrouping similar crimes into one type
CrimesDF[["Type"]] <- ifelse(CrimesDF[["Type"]] %in% c("CRIMINAL DAMAGE"), "DAMAGE",
ifelse(CrimesDF[["Type"]] %in% c("DECEPTIVE PRACTICE"), "DECEIVE",
ifelse(CrimesDF[["Type"]] %in% c("KIDNAPPING", "OFFENSE INVOLVING CHILDREN", "HUMAN TRAFFICKING"), "HUMANCHILD",
ifelse(CrimesDF[["Type"]] %in% c("NARCOTICS", "OTHER NARCOTIC VIOLATION"), "NARCOTICS",
ifelse(CrimesDF[["Type"]] %in% c("MOTOR VEHICLE THEFT"), "MOTO",
ifelse(CrimesDF[["Type"]] %in% c("OTHER OFFENSE"), "OTHER",
ifelse(CrimesDF[["Type"]] %in% c("CRIM SEXUAL ASSAULT", "PROSTITUTION", "SEX OFFENSE"), "SEX",
ifelse(CrimesDF[["Type"]] %in% c("GAMBLING", "INTERFERENCE WITH PUBLIC OFFICER", "INTIMIDATION", "LIQUOR LAW VIOLATION", "OBSCENITY", "PUBLIC INDECENCY", "PUBLIC PEACE VIOLATION", "STALKING", "NON-CRIMINAL", "NON-CRIMINAL (SUBJECT SPECIFIED)", "NON - CRIMINAL"), "SOCIETY",
ifelse(CrimesDF[["Type"]] %in% c("CRIMINAL TRESPASS"), "TRESPASS",
ifelse(CrimesDF[["Type"]] %in% c("CONCEALED CARRY LICENSE VIOLATION", "WEAPONS VIOLATION"), "WEAPONS", CrimesDF[["Type"]]))))))))))
CrimesDF[["Locdescrip"]] <- ifelse(CrimesDF[["Locdescrip"]] %in% c("VEHICLE-COMMERCIAL", "VEHICLE - DELIVERY TRUCK", "VEHICLE - OTHER RIDE SERVICE", "VEHICLE - OTHER RIDE SHARE SERVICE (E.G., UBER, LYFT)", "VEHICLE NON-COMMERCIAL", "TRAILER", "TRUCK", "DELIVERY TRUCK", "TAXICAB", "OTHER COMMERCIAL TRANSPORTATION"), "VEHICLE",
ifelse(CrimesDF[["Locdescrip"]] %in% c("BAR OR TAVERN", "TAVERN", "TAVERN/LIQUOR STORE"), "TAVERN",
ifelse(CrimesDF[["Locdescrip"]] %in% c("SCHOOL YARD", "SCHOOL, PRIVATE, BUILDING", "SCHOOL, PRIVATE, GROUNDS", "SCHOOL, PUBLIC, BUILDING", "SCHOOL, PUBLIC, GROUNDS", "COLLEGE/UNIVERSITY GROUNDS", "COLLEGE/UNIVERSITY RESIDENCE HALL"), "SCHOOL",
ifelse(CrimesDF[["Locdescrip"]] %in% c("RESIDENCE", "RESIDENCE-GARAGE", "RESIDENCE PORCH/HALLWAY", "RESIDENTIAL YARD (FRONT/BACK)", "DRIVEWAY - RESIDENTIAL", "GARAGE", "HOUSE", "PORCH", "YARD"), "RESIDENCE",
ifelse(CrimesDF[["Locdescrip"]] %in% c("PARKING LOT", "PARKING LOT/GARAGE(NON.RESID.)", "POLICE FACILITY/VEH PARKING LOT"), "PARKING",
ifelse(CrimesDF[["Locdescrip"]] %in% c("OTHER", "OTHER RAILROAD PROP / TRAIN DEPOT", "ABANDONED BUILDING", "ANIMAL HOSPITAL", "ATHLETIC CLUB", "BASEMENT", "BOAT/WATERCRAFT", "CHURCH", "CHURCH/SYNAGOGUE/PLACE OF WORSHIP", "COIN OPERATED MACHINE", "CONSTRUCTION SITE", "SEWER", "STAIRWELL", "VACANT LOT", "VACANT LOT/LAND", "VESTIBULE", "WOODED AREA", "FARM", "FACTORY", "FACTORY/MANUFACTURING BUILDING", "FEDERAL BUILDING", "FIRE STATION", "FOREST PRESERVE", "GOVERNMENT BUILDING", "GOVERNMENT BUILDING/PROPERTY", "JAIL / LOCK-UP FACILITY", "LIBRARY", "MOVIE HOUSE/THEATER", "POOL ROOM", "SPORTS ARENA/STADIUM", "WAREHOUSE", "AUTO", "AUTO / BOAT / RV DEALERSHIP", "CEMETARY"), "OTHERS",
ifelse(CrimesDF[["Locdescrip"]] %in% c("COMMERCIAL / BUSINESS OFFICE"), "BIGBUSINESS",
ifelse(CrimesDF[["Locdescrip"]] %in% c("PARK PROPERTY"), "PARK",
ifelse(CrimesDF[["Locdescrip"]] %in% c("ATM (AUTOMATIC TELLER MACHINE)", "BANK", "CREDIT UNION", "CURRENCY EXCHANGE", "SAVINGS AND LOAN"), "BANK",
ifelse(CrimesDF[["Locdescrip"]] %in% c("HOTEL", "HOTEL/MOTEL"), "HOTEL",
ifelse(CrimesDF[["Locdescrip"]] %in% c("HOSPITAL", "HOSPITAL BUILDING/GROUNDS", "DAY CARE CENTER", "NURSING HOME", "NURSING HOME/RETIREMENT HOME", "MEDICAL/DENTAL OFFICE"), "HEALTH",
ifelse(CrimesDF[["Locdescrip"]] %in% c("ALLEY", "BOWLING ALLEY"), "ALLEY",
ifelse(CrimesDF[["Locdescrip"]] %in% c("CHA APARTMENT", "CHA HALLWAY/STAIRWELL/ELEVATOR", "CHA PARKING LOT", "CHA PARKING LOT/GROUNDS"), "CHA",
ifelse(CrimesDF[["Locdescrip"]] %in% c("CTA BUS", "CTA BUS STOP", "CTA GARAGE / OTHER PROPERTY", "CTA PLATFORM", "CTA STATION", "CTA TRACKS - RIGHT OF WAY", "CTA TRAIN", "CTA \"\"L\"\" TRAIN"), "CTA",
ifelse(CrimesDF[["Locdescrip"]] %in% c("AIRPORT BUILDING NON-TERMINAL - NON-SECURE AREA", "AIRPORT BUILDING NON-TERMINAL - SECURE AREA", "AIRPORT EXTERIOR - NON-SECURE AREA", "AIRPORT EXTERIOR - SECURE AREA", "AIRPORT PARKING LOT", "AIRPORT TERMINAL LOWER LEVEL - NON-SECURE AREA", "AIRPORT TERMINAL LOWER LEVEL - SECURE AREA", "AIRPORT TERMINAL MEZZANINE - NON-SECURE AREA", "AIRPORT TERMINAL UPPER LEVEL - NON-SECURE AREA", "AIRPORT TERMINAL UPPER LEVEL - SECURE AREA", "AIRPORT TRANSPORTATION SYSTEM (ATS)", "AIRPORT VENDING ESTABLISHMENT", "AIRPORT/AIRCRAFT", "AIRCRAFT"), "AIRPORT",
ifelse(CrimesDF[["Locdescrip"]] %in% c("APPLIANCE STORE", "BARBERSHOP", "CAR WASH", "CLEANING STORE", "CONVENIENCE STORE", "DEPARTMENT STORE", "DRUG STORE", "GARAGE/AUTO REPAIR", "GAS STATION", "GAS STATION DRIVE/PROP.", "GROCERY FOOD STORE", "NEWSSTAND", "OFFICE", "PAWN SHOP", "RETAIL STORE", "SMALL RETAIL STORE"), "STORE",
ifelse(CrimesDF[["Locdescrip"]] %in% c("BRIDGE", "DRIVEWAY", "GANGWAY", "HIGHWAY/EXPRESSWAY", "LAKEFRONT/WATERFRONT/RIVERBANK", "SIDEWALK", "STREET", "HALLWAY"), "STREET",
CrimesDF[["Locdescrip"]])))))))))))))))))
Converting DataTable into Dataframe and normalising the values.
CrimesDF <- as.data.frame(CrimesDF)
CrimesDF <- CrimesDF[c("Case", "Date", "Year", "Month", "Day", "Season", "Tint", "Type", "Arrest", "Domestic", "Locdescrip", "Beat", "District", "Community", "Latitude", "Longitude", "Location")]
CrimesDF[, c("Beat", "Type", "District", "Community", "Month", "Day", "Locdescrip")] <- lapply(CrimesDF[, c("Beat", "Type", "District", "Community", "Month", "Day", "Locdescrip")], as.factor)
options(scipen=200)
Checking Number of Districts and Count of number of crimes in each District
length(unique(CrimesDF[["District"]]))
## [1] 22
table(CrimesDF[["District"]])
##
## 1 2 3 4 5 6 7 8 9 10 11 12 14
## 57356 53489 57288 65487 49891 72754 55644 70805 49563 51028 72007 61636 38451
## 15 16 17 18 19 20 22 24 25
## 42143 40576 31902 57244 54053 22159 37021 37759 58782
Highest number of crimes are in District 8
Checking Number of Commuinities and count of crimes in each Community
length(unique(CrimesDF[["Community"]]))
## [1] 77
table(CrimesDF[["Community"]])
##
## 1 2 3 4 5 6 7 8 9 10 11 12 13
## 18660 15983 17694 8474 5275 26294 16758 51818 1087 4851 4176 1973 3757
## 14 15 16 17 18 19 20 21 22 23 24 25 26
## 9835 12616 10923 6222 2532 19001 6076 8830 19511 33106 30946 63537 23829
## 27 28 29 30 31 32 33 34 35 36 37 38 39
## 19967 42053 36892 18467 11267 39542 10533 4779 13412 3577 3474 16155 7765
## 40 41 42 43 44 45 46 47 48 49 50 51 52
## 11257 9210 16225 41896 29853 5634 19785 1709 6342 28362 4851 8303 4852
## 53 54 55 56 57 58 59 60 61 62 63 64 65
## 17427 5943 2673 8032 3815 9291 3697 6162 18743 3734 9065 4130 6834
## 66 67 68 69 70 71 72 73 74 75 76 77
## 24192 26677 25579 30992 9085 32409 3776 13465 2106 8279 8036 12970
Highest number of crimes are in 25th community
crimes <- data.frame(Dataset) %>%
select(c(Date, Primary.Type)) %>%
mutate(Primary.Type = as.factor(Primary.Type),
Date = mdy_hms(Date),
Date = floor_date(Date, unit = "hours")) %>% #takes a date-time object and rounds it down to hours unit
arrange(Date)
crimes %>%
count(Primary.Type, sort = T) %>%
head(5) %>%
ggplot(aes(x = n, y = reorder(Primary.Type, n))) +
geom_col()
labs(title = 'Top 5 Crimes in Chicago',
x = 'Number of Crimes',
y = 'Crimes')
## $x
## [1] "Number of Crimes"
##
## $y
## [1] "Crimes"
##
## $title
## [1] "Top 5 Crimes in Chicago"
##
## attr(,"class")
## [1] "labels"
From the above plot, we can see that “THEFT” is the highest occuring crime. Let’s do Time series Analysis on THEFT.
#Time series analysis for Theft Create Prediction Time Frame We are taking only 5 years data so that it will be accurate
theft_crime <- crimes %>%
filter(Primary.Type == 'THEFT') %>%
group_by(Date) %>%
summarise(Theft = n()) %>%
filter(Date >= '2018-01-01' & Date <= '2022-12-31')
Printing range of dates, head and tail
head(theft_crime, 5)
tail(theft_crime, 5)
range(theft_crime$Date)
## [1] "2018-01-01 06:00:00 UTC" "2022-12-31 05:00:00 UTC"
range(theft_crime$Date)
## [1] "2018-01-01 06:00:00 UTC" "2022-12-31 05:00:00 UTC"
theft_crime <- theft_crime %>%
pad(start_val = ymd_hms("2018-01-01 00:00:00"), end_val = ymd_hms("2021-12-31 23:00:00")) %>%
replace(., is.na(.), 0)
## pad applied on the interval: hour
#Splitting dataset into Train and Test sets and create Time series object We will split the data into Train set and Test set and train our model.
theft_train <- head(theft_crime, nrow(theft_crime) - 365)
theft_test <- tail(theft_crime, 365)
To create a time-series model, we need to create a time-series object from our train data. Time-series object will be based on theft as it is the one that we are going to predict, we set the frequency to be 24 as it is total hour of reported crime for 1 day.
theft_ts <- ts(theft_crime$Theft, frequency = 24)
head(theft_crime, 5)
tail(theft_crime, 5)
Theft_plot <- theft_train %>%
ggplot(aes(x = Date, y = Theft)) +
geom_line(aes(color = "theft")) +
scale_x_datetime(name = "Date", date_breaks = "1 year") +
scale_y_continuous(breaks = seq(0, 400, 100)) +
theme_minimal() +
labs(title = "Chicago Theft Crime", subtitle = "2018 - 2022")
ggplotly(Theft_plot)
Now, we will use autoplot to see the trend and sesonality
theft_ts_ap <- theft_ts %>%
tail(365) %>%
decompose()
theft_ts_ap %>%
autoplot()
Upon examining the plot, it becomes evident that the trend still reveals certain patterns, resembling a seasonal nature. This suggests the existence of additional seasonality patterns that haven’t been captured by the current visualization. To address this, we aim to construct a Multi-Seasonal Time Series Object.
Create and Decompose MSTS Object
theft_multi <- msts(theft_crime$Theft, seasonal.periods = c(24, # Daily
24*7, # Weekly
24*30)) # Monthly
theft_multi_dec <- theft_multi %>%
mstl()
theft_multi_dec %>%
tail(365) %>%
autoplot()
From the plot above, we can see the trend of the Theft Crime is already going smooth. The Theft Crime trend itself is increasing in the last 365 days.
#Seasonality Analysis
# Decompose MSTS Object
#theft_multi_dec <- theft_multi %>%
# mstl()
#theft_multi_dec %>%
# tail(24*7*4*12) %>%
# autoplot()
# Decompose MSTS Object
theft_multi_dec <- theft_multi %>% mstl()
# Convert the decomposed object to a data frame
df_theft_multi <- as.data.frame(theft_multi_dec)
df_theft_multi = as.data.frame(theft_multi_dec)
Hourly Seasonality
HourlyPlot <- df_theft_multi %>%
mutate(day = theft_crime$Date) %>%
group_by(day) %>%
summarise(seasonal = sum(Seasonal24 + Seasonal168 + Seasonal720)) %>%
head(24*7) %>%
ggplot(aes(x = day, y = seasonal)) +
geom_point(col = "maroon") +
geom_line(col = "blue") +
theme_minimal()
HourlyPlot
From the above graph, we can see that thefts are occurring more during
midday and falling during night.
Daily Seasonality
DailyPlot <- df_theft_multi %>%
mutate(day = wday(theft_crime$Date, label = T)) %>%
group_by(day) %>%
summarise(seasonal = sum(Seasonal24 + Seasonal168 + Seasonal720)) %>%
ggplot(aes(x = day, y = seasonal)) +
geom_col() +
theme_minimal()
DailyPlot
From the Daily Seasonality Graph, we can see that theft count increases
from wednesday and reaches its peak on friday and will fall. The least
number of thefts are on sunday.
Monthly seasonality
MonthlyPlot <- df_theft_multi %>%
mutate(day = theft_crime$Date, month = month(theft_crime$Date, label = T)) %>%
group_by(month) %>%
summarise(seasonal = sum(Seasonal24 + Seasonal168 + Seasonal720)) %>%
head(24*30) %>%
ggplot(aes(x = month, y = seasonal)) +
geom_point() + geom_col() +
theme_minimal()
MonthlyPlot
From the above graph, we can see that the crimes increases from June and
are on peak on August and will fall from then. The lowest crimes occur
in March.
In summary We have successfully predicted the frequency of theft crimes based on our analysis. It is reasonable to conclude that theft crime will probably start to rise at 10 a.m., peak at 5 p.m. (after business hours), and then continue to rise until 12 a.m. More crimes occur on Fridays. The actual crime is more likely to occur between June and October.
Similarly, we can do Time series analysis for each crime.
#Analysis and Visualisation
Plotting Number of Crimes versus Year
# Detach plyr if it's loaded and not required
#if ("package:plyr" %in% search()) {
# detach("package:plyr", unload=TRUE)
#}
CrimesDF %>%
dplyr::group_by(Year) %>%
dplyr::summarise(Count = n()) %>%
ggplot(aes(x = Year, y = Count)) +
geom_line(colour = "grey") +
geom_point(colour = "grey") +
geom_bar(aes(x = Year, y = Count), stat = "identity", fill = "blue", width = 0.3, position=position_dodge(0.4)) +
labs(x = "Year", y = "Number of Crimes", title = "Evolution of Number of Crimes") +
geom_text(aes(x = Year, y = Count, label = Count), size = 3, vjust = -1, position = position_dodge(0.9)) +
theme_minimal() +
theme(axis.title.x=element_blank(), axis.title.y=element_blank())
The number of cases decreased from 2019 to 2020 and the trend increased after 2021.
By time intervals
TimeIntervalsPlot <- CrimesDF %>%
group_by(Tint) %>%
summarise(Count = n()) %>%
ggplot(aes(x = Tint, y = Count)) +
geom_bar(aes(x = Tint, y = Count), stat = "identity", fill = "blue", width = 0.3, position=position_dodge(0.4)) +
labs(x = "Time intervals", y = "Number of crimes", title = "Evolution by time intervals") +
theme_minimal() +
theme(axis.text.x = element_text(angle = 75,vjust = 1,hjust = 1)) +
theme(axis.title.x=element_blank()) +
theme(axis.title.y=element_blank())
print(TimeIntervalsPlot)
From the above plot, we can see that the most number of crimes are
happening from 12-5 and the trend decreases later.
By WeekDays
WeekDaysPlot <- CrimesDF %>%
group_by(Day) %>%
summarise(Count = n()) %>%
ggplot(aes(x = factor(Day, level = c("Mon", "Tue", "Wed", "Thu", "Fri", "Sat", "Sun")), y = Count)) +
geom_bar(stat = "identity", fill = "blue", width = 0.3, position = position_dodge(0.4)) +
labs(x = "Weekdays", y = "Number of crimes", title = "Evolution by weekdays") +
theme_minimal() +
theme(axis.text.x = element_text(angle = 75, vjust = 1, hjust = 1)) +
theme(axis.title.x = element_blank()) +
theme(axis.title.y = element_blank())
print(WeekDaysPlot)
By the above graph we can see that the trend is almost the same across
all the days. There is a slight increase of crimes on fridays and
saturdays.
By Months
MonthPlot <- CrimesDF %>%
group_by(Month) %>%
summarise(Count = n()) %>%
ggplot(aes(x = factor(Month, level = c("Jan", "Feb", "Mar", "Apr", "May", "Jun", "Jul", "Aug", "Sep", "Oct", "Nov", "Dec")), y = Count)) +
geom_bar(stat = "identity", fill = "blue", width = 0.3, position = position_dodge(0.4)) +
labs(x = "Months", y = "Number of crimes", title = "Evolution by months") +
theme_minimal() +
theme(axis.text.x = element_text(angle = 75, vjust = 1, hjust = 1)) +
theme(axis.title.x = element_blank()) +
theme(axis.title.y = element_blank())
print(MonthPlot)
Crimes were more likely to happen in June to August and are less likely
to happen in December, February.
# By seasons
p4 <- CrimesDF %>%
group_by(Season) %>%
summarise(Count = n()) %>%
ggplot(aes(x = Season, y = Count)) +
geom_bar(aes(x = factor(Season, level = c("SPRING", "SUMMER", "FALL", "WINTER")), y = Count), stat = "identity", fill = "blue", width = 0.3, position=position_dodge(0.4)) +
labs(x = "Seasons", y = "Number of crimes", title = "Evolution by seasons") +
theme_minimal() +
theme(axis.text.x = element_text(angle = 75,vjust = 1,hjust = 1)) +
theme(axis.title.x=element_blank()) +
theme(axis.title.y=element_blank())
# Combine plots into one plot
print(p4)
Crimes are more likely to happen in fall and less likely to happen in
winter.
Top 5 most frequent Crime areas
top5_Places <- CrimesDF %>%
group_by(Locdescrip) %>%
summarise(Count = n()) %>%
arrange(desc(Count)) %>%
head(5) # Select the top 5 places
plot_top5_Places <- top5_Places %>%
ggplot(aes(x = reorder(Locdescrip, Count), y = Count)) +
geom_bar(stat = "identity", fill = "blue", width = 0.3, position = position_dodge(0.4)) +
labs(x = "Places", y = "Number of crimes", title = "Top 5 most frequent places") +
theme_minimal() +
theme(axis.text.x = element_text(angle = 75, vjust = 1, hjust = 1)) +
theme(axis.title.x = element_blank()) +
theme(axis.title.y = element_blank())
plot_top5_Places
print(top5_Places)
## # A tibble: 5 × 2
## Locdescrip Count
## <fct> <int>
## 1 STREET 367187
## 2 APARTMENT 192348
## 3 RESIDENCE 187694
## 4 STORE 94047
## 5 OTHERS 29915
Street is the top place where crime can happen. Dont think your apartment/residence is safe. The next place where crimes could take place is apartment followed by residence.
Bottom 5 most frequent Crime areas
bottom5_places <- CrimesDF %>%
group_by(Locdescrip) %>%
summarise(Count = n()) %>%
arrange(Count) %>%
head(5) # Select the bottom 5 places
plot_bottom5_places <- bottom5_places %>%
ggplot(aes(x = reorder(Locdescrip, Count), y = Count)) +
geom_bar(stat = "identity", fill = "blue", width = 0.3, position = position_dodge(0.4)) +
labs(x = "Places", y = "Number of crimes", title = "Bottom 5 most frequent places") +
theme_minimal() +
theme(axis.text.x = element_text(angle = 75, vjust = 1, hjust = 1)) +
theme(axis.title.x = element_blank()) +
theme(axis.title.y = element_blank())
plot_bottom5_places
print(bottom5_places)
## # A tibble: 5 × 2
## Locdescrip Count
## <fct> <int>
## 1 HORSE STABLE 1
## 2 PUBLIC GRAMMAR SCHOOL 1
## 3 RAILROAD PROPERTY 1
## 4 VEHICLE - COMMERCIAL: TROLLEY BUS 1
## 5 CLUB 2
Least amount of crimes happen in the above places.
shapefile_path <- "/Users/divyakampalli/Downloads/boundaries-communityareas/geo_export_e07c1c74-44b6-459c-98d9-e8c9587ea2b6.shp"
mapcomu <- st_read(shapefile_path)
## Reading layer `geo_export_e07c1c74-44b6-459c-98d9-e8c9587ea2b6' from data source `/Users/divyakampalli/Downloads/boundaries-communityareas/geo_export_e07c1c74-44b6-459c-98d9-e8c9587ea2b6.shp'
## using driver `ESRI Shapefile'
## Simple feature collection with 77 features and 9 fields
## Geometry type: MULTIPOLYGON
## Dimension: XY
## Bounding box: xmin: -87.94011 ymin: 41.64454 xmax: -87.52414 ymax: 42.02304
## Geodetic CRS: WGS84(DD)
names(mapcomu)
## [1] "area" "area_num_1" "area_numbe" "comarea" "comarea_id"
## [6] "community" "perimeter" "shape_area" "shape_len" "geometry"
temp <- CrimesDF %>%
group_by(Community) %>%
summarise(Count = n())
temp2df <- left_join(st_as_sf(mapcomu), temp, by = c("area_numbe" = "Community"))
locplot <- ggplot(data = temp2df) +
geom_sf(aes(fill = Count), color = "black", size = 0.25) +
scale_fill_gradient(low = "white", high = "red") +
labs(title = "Number of crimes per community") +
theme_void() +
theme(legend.position = "bottom")
dfpolice <- fread(file = "/Users/divyakampalli/Downloads/Police_Stations_-_Map.csv", header = T, sep = ",", na.strings = "")
dfpolice$LOCATION <- gsub("[(*)]", "", dfpolice$LOCATION)
policeloc <- str_split_fixed(dfpolice$LOCATION, ", ", 2)
policeloc <- as.data.frame(policeloc)
colnames(policeloc) <- c("lat", "long")
policeloc$lat <- as.numeric(as.character(policeloc$lat))
policeloc$long <- as.numeric(as.character(policeloc$long))
policeloc$id <- dfpolice$DISTRICT
locplot <- locplot +
geom_point(data = policeloc, aes(x = long, y = lat), size = 1, shape = 24, fill = "black")
# Display the final plot
locplot
From the above plot, we can see that Austin has highest number of
crimes.
# Types and number of crimes
p1 <- CrimesDF %>%
group_by(Type) %>%
summarise(Count = n()) %>%
ggplot(aes(x = Type, y = Count)) +
geom_bar(aes(x = reorder(Type, Count), y = Count), stat = "identity", fill = "#6495ED", width = 0.3, position=position_dodge(0.4)) +
coord_flip() +
labs(x = "Number of crimes", y = "Type", title = "Evolution of number of crimes for different types") +
theme_minimal() +
theme(axis.title.x=element_blank()) +
theme(axis.title.y=element_blank())
p1
# Evolution over years
p2 <- CrimesDF %>%
group_by(Year, Type) %>%
summarise(Count = n()) %>%
ggplot(aes(x = Year, y = Count, fill = Type)) +
geom_area() +
labs(x = "Years", y = "Number of crimes", title = "Evolution of crime types over years")
## `summarise()` has grouped output by 'Year'. You can override using the
## `.groups` argument.
p2
# Get unique crime types
crime_types <- unique(CrimesDF$Type)
# Create a list to store individual plots
plots_list <- list()
# Loop through each crime type and create a plot
for (crime_type in crime_types) {
plot_data <- CrimesDF %>%
filter(Type == crime_type) %>%
group_by(Year) %>%
summarise(Count = n())
# Create a plot for the current crime type
current_plot <- ggplot(plot_data, aes(x = Year, y = Count)) +
geom_smooth(method = "lm") +
geom_point() +
labs(x = "Years", y = "Number of crimes", title = paste("Evolution of", crime_type, "over years"))
# Add the plot to the list
plots_list[[crime_type]] <- current_plot
}
# Print and view each plot
for (crime_type in crime_types) {
print(plots_list[[crime_type]])
}
## `geom_smooth()` using formula = 'y ~ x'
## `geom_smooth()` using formula = 'y ~ x'
## `geom_smooth()` using formula = 'y ~ x'
## `geom_smooth()` using formula = 'y ~ x'
## `geom_smooth()` using formula = 'y ~ x'
## `geom_smooth()` using formula = 'y ~ x'
## `geom_smooth()` using formula = 'y ~ x'
## `geom_smooth()` using formula = 'y ~ x'
## `geom_smooth()` using formula = 'y ~ x'
## `geom_smooth()` using formula = 'y ~ x'
## `geom_smooth()` using formula = 'y ~ x'
## `geom_smooth()` using formula = 'y ~ x'
## `geom_smooth()` using formula = 'y ~ x'
## `geom_smooth()` using formula = 'y ~ x'
## `geom_smooth()` using formula = 'y ~ x'
## `geom_smooth()` using formula = 'y ~ x'
## `geom_smooth()` using formula = 'y ~ x'
## `geom_smooth()` using formula = 'y ~ x'
## `geom_smooth()` using formula = 'y ~ x'
# Transform the type
CrimesDF[, c("Month", "Day", "Season", "Tint")] <- lapply(CrimesDF[, c("Month", "Day", "Season", "Tint")], as.character)
# By time intervals
p1 <- CrimesDF %>%
group_by(Type, Tint) %>%
summarise(Count = n()) %>%
ggplot(aes(x = Tint, y = reorder(Type, Count))) +
geom_tile(aes(fill = Count)) +
scale_x_discrete("Time intervals", expand = c(0, 0), position = "top") +
scale_y_discrete("Crime types", expand = c(0, -2)) +
scale_fill_gradient("Number of crimes", low = "white", high = "red") +
ggtitle("Evolution by time intervals") +
theme_bw() +
theme(panel.grid.major =element_line(colour = NA), panel.grid.minor = element_line(colour = NA))
## `summarise()` has grouped output by 'Type'. You can override using the
## `.groups` argument.
print(p1)
# By weekdays
p2 <- CrimesDF %>%
group_by(Type, Day) %>%
summarise(Count = n()) %>%
ggplot(aes(x = Day, y = reorder(Type, Count))) +
geom_tile(aes(fill = Count)) +
scale_x_discrete("Weekdays", expand = c(0, 0), position = "top") +
scale_y_discrete("Crime types", expand = c(0, -2)) +
scale_fill_gradient("Number of crimes", low = "white", high = "red") +
ggtitle("Evolution by weekdays") +
theme_bw() +
theme(panel.grid.major =element_line(colour = NA), panel.grid.minor = element_line(colour = NA))
## `summarise()` has grouped output by 'Type'. You can override using the
## `.groups` argument.
print(p2)
# By months
p3 <- CrimesDF %>%
group_by(Type, Month) %>%
summarise(Count = n()) %>%
ggplot(aes(x = Month, y = reorder(Type, Count))) +
geom_tile(aes(fill = Count)) +
scale_x_discrete("Months", expand = c(0, 0), position = "top") +
scale_y_discrete("Crime types", expand = c(0, -2)) +
scale_fill_gradient("Number of crimes", low = "white", high = "red") +
ggtitle("Evolution by months") +
theme_bw() +
theme(panel.grid.major =element_line(colour = NA), panel.grid.minor = element_line(colour = NA))
## `summarise()` has grouped output by 'Type'. You can override using the
## `.groups` argument.
print(p3)
# By seasons
p4 <- CrimesDF %>%
group_by(Type, Season) %>%
summarise(Count = n()) %>%
ggplot(aes(x = Season, y = reorder(Type, Count))) +
geom_tile(aes(fill = Count)) +
scale_x_discrete("Seasons", expand = c(0, 0), position = "top") +
scale_y_discrete("Crime types", expand = c(0, -2)) +
scale_fill_gradient("Number of crimes", low = "white", high = "red") +
ggtitle("Evolution by seasons") +
theme_bw() +
theme(panel.grid.major =element_line(colour = NA), panel.grid.minor = element_line(colour = NA))
## `summarise()` has grouped output by 'Type'. You can override using the
## `.groups` argument.
print(p4)
# Find top10 most frequent places
top10P <- head(names(sort(table(CrimesDF$Locdescrip), decreasing = TRUE)), 10)
# Find top10 most frequent crime types
top10T <- head(names(sort(table(CrimesDF$Type), decreasing = TRUE)), 10)
# Plot
filter(CrimesDF, Locdescrip %in% top10P) %>%
filter(Type %in% top10T) %>%
group_by(Type, Locdescrip) %>%
summarise(Count = n()) %>%
ggplot(aes(x = reorder(Locdescrip, Count), y = reorder(Type, Count))) +
geom_tile(aes(fill = Count)) +
scale_x_discrete("Places", expand = c(0, 0), position = "top") +
scale_y_discrete("Crime types", expand = c(0, -2)) +
scale_fill_gradient("Number of crimes", low = "white", high = "red") +
ggtitle("Evolution by places") +
theme_bw() +
theme(
panel.grid.major = element_line(colour = NA),
panel.grid.minor = element_line(colour = NA),
axis.text.x = element_text(angle = 45, vjust = 0.1, hjust = 0.1) # Diagonal X-axis labels
)
## `summarise()` has grouped output by 'Type'. You can override using the
## `.groups` argument.
# Find top10 most dangerous community areas
top10C <- head(names((sort(table(CrimesDF$Community), decreasing = TRUE))), 10)
# Plot
filter(CrimesDF, Type %in% top10T) %>%
filter(Community %in% top10C) %>%
group_by(Type, Community) %>%
summarise(Count = n()) %>%
ggplot(aes(x = reorder(Community, Count), y = reorder(Type, Count))) +
geom_tile(aes(fill = Count)) +
scale_x_discrete("Community areas", expand = c(0, 0), position = "top") +
scale_y_discrete("Crime types", expand = c(0, -2)) +
scale_fill_gradient("Number of crimes", low = "white", high = "red") +
ggtitle("Evolution by areas") +
theme_bw() +
theme(panel.grid.major =element_line(colour = NA), panel.grid.minor = element_line(colour = NA))
## `summarise()` has grouped output by 'Type'. You can override using the
## `.groups` argument.
# Numbers
CrimesDF %>%
filter(Domestic == T) %>%
group_by(Year) %>%
summarise(Count = n()) %>%
ggplot(aes(x = Year, y = Count)) +
geom_bar(aes(x = Year, y = Count), stat = "identity", fill = "#6495ED", width = 0.3, position=position_dodge(0.4)) +
labs(x = "Number of crimes", y = "Year", title = "Evolution of number of domestic crimes in different years") +
theme_minimal() +
theme(axis.title.x=element_blank()) +
theme(axis.title.y=element_blank())
# By time intervals
p1 <- CrimesDF %>%
filter(Domestic == T) %>%
group_by(Tint) %>%
summarise(Count = n()) %>%
ggplot(aes(x = Tint, y = Count)) +
geom_bar(aes(x = Tint, y = Count), stat = "identity", fill = "#6495ED", width = 0.3, position=position_dodge(0.4)) +
labs(x = "Time intervals", y = "Number of domestic crimes", title = "Evolution by time intervals") +
theme_minimal() +
theme(axis.text.x = element_text(angle = 75,vjust = 1,hjust = 1)) +
theme(axis.title.x=element_blank()) +
theme(axis.title.y=element_blank())
# By weekdays
p2 <- CrimesDF %>%
filter(Domestic == T) %>%
group_by(Day) %>%
summarise(Count = n()) %>%
ggplot(aes(x = Day, y = Count)) +
geom_bar(aes(x = factor(Day, level = c("lun\\.", "mar\\.", "mer\\.", "jeu\\.", "ven\\.", "sam\\.", "dim\\.")), y = Count), stat = "identity", fill = "#6495ED", width = 0.3, position=position_dodge(0.4)) +
labs(x = "Weekdays", y = "Number of domestic crimes", title = "Evolution by weekdays") +
theme_minimal() +
theme(axis.text.x = element_text(angle = 75,vjust = 1,hjust = 1)) +
theme(axis.title.x=element_blank()) +
theme(axis.title.y=element_blank())
# By months
p3 <- CrimesDF %>%
filter(Domestic == T) %>%
group_by(Month) %>%
summarise(Count = n()) %>%
ggplot(aes(x = Month, y = Count)) +
geom_bar(aes(x = factor(Month, level = c("janv\\.", "févr\\.", "mars", "avr\\.", "mai", "juin", "juil\\.", "août", "sept\\.", "oct\\.", "nov\\.", "déc\\.")), y = Count), stat = "identity", fill = "#6495ED", width = 0.3, position=position_dodge(0.4)) +
labs(x = "Months", y = "Number of domestic crimes", title = "Evolution by months") +
theme_minimal() +
theme(axis.text.x = element_text(angle = 75,vjust = 1,hjust = 1)) +
theme(axis.title.x=element_blank()) +
theme(axis.title.y=element_blank())
# By seasons
p4 <- CrimesDF %>%
filter(Domestic == T) %>%
group_by(Season) %>%
summarise(Count = n()) %>%
ggplot(aes(x = Season, y = Count)) +
geom_bar(aes(x = factor(Season, level = c("SPRING", "SUMMER", "FALL", "WINTER")), y = Count), stat = "identity", fill = "#6495ED", width = 0.3, position=position_dodge(0.4)) +
labs(x = "Seasons", y = "Number of domestic crimes", title = "Evolution by seasons") +
theme_minimal() +
theme(axis.text.x = element_text(angle = 75,vjust = 1,hjust = 1)) +
theme(axis.title.x=element_blank()) +
theme(axis.title.y=element_blank())
p1
p2
p3
p4
# Locations
#CrimesDF %>%
# filter(Domestic == T) %>%
# group_by(Locdescrip) %>%
# summarise(Count = n()) %>%
# ggplot(aes(x = Locdescrip, y = Count)) +
# geom_bar(aes(x = reorder(Locdescrip, Count), y = Count), stat = "identity", fill = "#6495ED", width = 0.3, position=position_dodge(0.4)) +
# labs(x = "Places", y = "Number of crimes", title = "Evolution by places") +
# theme_minimal() +
# theme(axis.text.x = element_text(angle = 75,vjust = 1,hjust = 1)) +
# theme(axis.title.x=element_blank()) +
# theme(axis.title.y=element_blank())
# Extract data
temp <- CrimesDF %>%
filter(Arrest == T) %>%
group_by(Year) %>%
summarise(Count = n())
# Compute the crime rates
temp$rate <- lapply(temp$Count, function(x) x / nrow(CrimesDF))
temp$rate <- as.numeric(temp$rate)
# Plot
ggplot(temp, aes(x = Year, y = rate)) +
geom_line() +
theme_minimal() +
theme(axis.title.x=element_blank()) +
theme(axis.title.y=element_blank())
# Find top10 most dangerous community areas
top10C <- head(names((sort(table(CrimesDF$Community), decreasing = TRUE))), 10)
# Iterate through each community area
for (community_area in top10C) {
# Filter data for the current community area
community_data <- filter(CrimesDF, Community == community_area)
# Create a plot for the current community area
plot <- community_data %>%
group_by(Year) %>%
summarise(Count = n()) %>%
ggplot(aes(x = Year, y = Count)) +
geom_smooth(method = "lm") +
geom_point() +
labs(x = "Years", y = "Number of crimes", title = paste("Evolution of crimes in", community_area, "over years"))
# Display the plot
print(plot)
}
## `geom_smooth()` using formula = 'y ~ x'
## `geom_smooth()` using formula = 'y ~ x'
## `geom_smooth()` using formula = 'y ~ x'
## `geom_smooth()` using formula = 'y ~ x'
## `geom_smooth()` using formula = 'y ~ x'
## `geom_smooth()` using formula = 'y ~ x'
## `geom_smooth()` using formula = 'y ~ x'
## `geom_smooth()` using formula = 'y ~ x'
## `geom_smooth()` using formula = 'y ~ x'
## `geom_smooth()` using formula = 'y ~ x'
# Extract data
temp <- CrimesDF %>%
filter(Arrest == TRUE, Community %in% top10C) %>%
group_by(Year, Community) %>%
summarise(Count = n())
## `summarise()` has grouped output by 'Year'. You can override using the
## `.groups` argument.
# Compute the crime rates
temp$rate <- temp$Count / nrow(CrimesDF)
# Iterate through each community area
for (community_area in unique(temp$Community)) {
# Filter data for the current community area
community_data <- filter(temp, Community == community_area)
# Create a plot for the current community area
plot <- ggplot(community_data, aes(x = Year, y = rate)) +
geom_line() +
labs(x = "Years", y = "Crime rates", title = paste("Evolution of arrested crime rates in Community - ", community_area, "over years"))
# Display the plot
print(plot)
}
# Extract data
temp <- filter(CrimesDF, Arrest == T) %>%
group_by(Year, Type) %>%
summarise(Count = n())
## `summarise()` has grouped output by 'Year'. You can override using the
## `.groups` argument.
# Compute the crime rates
temp$rate <- lapply(temp$Count, function(x) x / nrow(CrimesDF))
temp$rate <- as.numeric(temp$rate)
# Plot
ggplot(temp, aes(x = Year, y = rate, colour = Type)) +
geom_line()
There is a steady decrease in the number of crimes even in the most dangerous communities. But there was also significant reduction in the arrest rate. This shows the police inefficiency. In conclusion, the arrest rate is very low for the amount of crimes and this shows that the Chicago police work on it along with keeping the community safe.