This report analyzes crime data in Colchester alongside climate data to explore potential correlations and trends. The analysis includes data visualization, time-series trends, and spatial mapping.
library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr 1.1.4 ✔ readr 2.1.5
## ✔ forcats 1.0.0 ✔ stringr 1.5.1
## ✔ ggplot2 3.5.1 ✔ tibble 3.2.1
## ✔ lubridate 1.9.4 ✔ tidyr 1.3.1
## ✔ purrr 1.0.2
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(lubridate)
library(ggplot2)
library(leaflet)
library(plotly)
## Warning: package 'plotly' was built under R version 4.4.3
##
## Attaching package: 'plotly'
##
## The following object is masked from 'package:ggplot2':
##
## last_plot
##
## The following object is masked from 'package:stats':
##
## filter
##
## The following object is masked from 'package:graphics':
##
## layout
library(GGally)
## Registered S3 method overwritten by 'GGally':
## method from
## +.gg ggplot2
library(corrplot)
## corrplot 0.95 loaded
crime <- read.csv("C:/Users/Kush/Downloads/MA304 Assignment/crime24.csv")
temp <- read.csv("C:/Users/Kush/Downloads/MA304 Assignment/temp24.csv")
# Remove unnecessary columns
crime <- crime %>% select(category, date, lat, long, street_name, outcome_status)
temp <- temp %>% select(Date, TemperatureCAvg, TemperatureCMax, TemperatureCMin, Precmm, WindkmhInt, HrAvg)
# Handling missing values
temp <- temp %>% drop_na()
crime <- crime %>% filter(!is.na(lat) & !is.na(long))
CRIME
Two-way table
# Create a two-way table
crime_table <- table(crime$category, crime$outcome_status)
# Display the two-way table
crime_table
##
## Action to be taken by another organisation
## anti-social-behaviour 0
## bicycle-theft 3
## burglary 1
## criminal-damage-arson 6
## drugs 2
## other-crime 4
## other-theft 1
## possession-of-weapons 2
## public-order 8
## robbery 0
## shoplifting 5
## theft-from-the-person 2
## vehicle-crime 1
## violent-crime 84
##
## Awaiting court outcome Court result unavailable
## anti-social-behaviour 0 0
## bicycle-theft 3 2
## burglary 10 6
## criminal-damage-arson 28 34
## drugs 13 19
## other-crime 7 12
## other-theft 6 5
## possession-of-weapons 8 9
## public-order 20 20
## robbery 7 4
## shoplifting 82 65
## theft-from-the-person 2 0
## vehicle-crime 7 6
## violent-crime 96 107
##
## Formal action is not in the public interest
## anti-social-behaviour 0
## bicycle-theft 0
## burglary 0
## criminal-damage-arson 0
## drugs 7
## other-crime 2
## other-theft 2
## possession-of-weapons 0
## public-order 11
## robbery 1
## shoplifting 2
## theft-from-the-person 0
## vehicle-crime 0
## violent-crime 12
##
## Further action is not in the public interest
## anti-social-behaviour 0
## bicycle-theft 0
## burglary 0
## criminal-damage-arson 1
## drugs 4
## other-crime 1
## other-theft 0
## possession-of-weapons 1
## public-order 0
## robbery 0
## shoplifting 0
## theft-from-the-person 0
## vehicle-crime 0
## violent-crime 7
##
## Further investigation is not in the public interest
## anti-social-behaviour 0
## bicycle-theft 0
## burglary 0
## criminal-damage-arson 0
## drugs 0
## other-crime 1
## other-theft 0
## possession-of-weapons 0
## public-order 0
## robbery 0
## shoplifting 0
## theft-from-the-person 0
## vehicle-crime 0
## violent-crime 1
##
## Investigation complete; no suspect identified
## anti-social-behaviour 0
## bicycle-theft 113
## burglary 108
## criminal-damage-arson 246
## drugs 18
## other-crime 14
## other-theft 283
## possession-of-weapons 6
## public-order 165
## robbery 41
## shoplifting 313
## theft-from-the-person 66
## vehicle-crime 208
## violent-crime 446
##
## Local resolution Offender given a caution
## anti-social-behaviour 0 0
## bicycle-theft 0 0
## burglary 0 0
## criminal-damage-arson 9 11
## drugs 118 16
## other-crime 1 1
## other-theft 1 0
## possession-of-weapons 4 4
## public-order 12 3
## robbery 0 0
## shoplifting 25 1
## theft-from-the-person 0 0
## vehicle-crime 0 0
## violent-crime 31 20
##
## Status update unavailable
## anti-social-behaviour 0
## bicycle-theft 2
## burglary 7
## criminal-damage-arson 10
## drugs 9
## other-crime 5
## other-theft 12
## possession-of-weapons 8
## public-order 27
## robbery 4
## shoplifting 7
## theft-from-the-person 2
## vehicle-crime 3
## violent-crime 141
##
## Suspect charged as part of another case
## anti-social-behaviour 0
## bicycle-theft 0
## burglary 0
## criminal-damage-arson 0
## drugs 0
## other-crime 0
## other-theft 1
## possession-of-weapons 0
## public-order 0
## robbery 0
## shoplifting 0
## theft-from-the-person 0
## vehicle-crime 0
## violent-crime 0
##
## Unable to prosecute suspect Under investigation
## anti-social-behaviour 0 0
## bicycle-theft 16 10
## burglary 26 13
## criminal-damage-arson 110 24
## drugs 16 43
## other-crime 43 9
## other-theft 74 27
## possession-of-weapons 16 7
## public-order 160 32
## robbery 25 3
## shoplifting 92 37
## theft-from-the-person 14 5
## vehicle-crime 33 12
## violent-crime 1195 280
ggplot(crime, aes(x=category)) +
geom_bar(fill='steelblue') +
coord_flip() +
theme_minimal() +
labs(title="Crime Frequency by Category", x="Crime Category", y="Count")
Pie Chart
#Pie Chart: Crime Distribution by Category
crime_pie <- crime %>% count(category)
ggplot(crime_pie, aes(x="", y=n, fill=category)) +
geom_bar(stat="identity", width=1) +
coord_polar("y", start=0) +
theme_void() +
labs(title="Crime Distribution by Category")
Dot plot
#Dot Plot: Crime Frequency
ggplot(crime, aes(x=category)) +
geom_dotplot(binwidth=0.1, fill="steelblue", color="black") +
theme_minimal() +
labs(title="Dot Plot: Crime Frequency by Category", x="Crime Category", y="Count")
Histogram
# Histogram: Crime by Date
ggplot(crime, aes(x=date)) +
geom_histogram(stat="count", fill="steelblue", color="black") +
theme_minimal() +
labs(title="Crime Frequency by Date", x="Date", y="Count")
## Warning in geom_histogram(stat = "count", fill = "steelblue", color = "black"):
## Ignoring unknown parameters: `binwidth`, `bins`, and `pad`
Box Plot
#Box Plot: Crime by Category
ggplot(crime, aes(x=category, y=lat)) +
geom_boxplot(fill="red", alpha=0.6) +
theme_minimal() +
labs(title="Box Plot: Crime by Category", x="Crime Category", y="Latitude")
Violin plot & Scateer plot
#Violin Plot: Crime by Outcome Status and Scatter Plot: Crime Location (Lat vs. Long)
ggplot(crime, aes(x=long, y=lat)) +
geom_point(color="blue", alpha=0.5) +
theme_minimal() +
labs(title="Crime Location: Latitude vs. Longitude", x="Longitude", y="Latitude")
ggplot(crime, aes(x=outcome_status, y=lat)) +
geom_violin(fill="steelblue", alpha=0.6) +
theme_minimal() +
labs(title="Violin Plot: Crime Outcome Status", x="Outcome Status", y="Latitude")
## Warning: Groups with fewer than two datapoints have been dropped.
## ℹ Set `drop = FALSE` to consider such groups for position adjustment purposes.
Correlation
#Correlation Analysis
# Compute the correlation matrix for crime data
cor_matrix <- cor(crime %>% select(lat, long), use="complete.obs")
corrplot(cor_matrix, method="circle")
Spatial Analysis: Mapping Crime Locations
#Spatial Analysis: Mapping Crime Locations
leaflet(crime) %>%
addTiles() %>%
addCircleMarkers(~long, ~lat, color="red", radius=2, opacity=0.5)
TEMPERTURE
Two-way table
# two way table
# Load temperature data
temp <- read.csv("C:/Users/Kush/Downloads/MA304 Assignment/temp24.csv")
# Convert date column to Date format
temp$Date <- as.Date(temp$Date, format="%Y-%m-%d")
# Select relevant columns
temp <- temp %>% select(Date, TemperatureCAvg)
# Handle missing values
temp <- temp %>% drop_na()
# Categorize Temperature into Low, Medium, and High
temp <- temp %>%
mutate(TempCategory = case_when(
TemperatureCAvg < 10 ~ "Low",
TemperatureCAvg >= 10 & TemperatureCAvg <= 20 ~ "Medium",
TemperatureCAvg > 20 ~ "High"
))
# Create a two-way table (contingency table) for Temperature Categories
temp_table <- table(temp$TempCategory)
# Display the two-way table
temp_table
##
## High Low Medium
## 13 155 198
ggplot(temp, aes(x=TempCategory)) +
geom_bar(fill='steelblue') +
theme_minimal() +
labs(title="Temperature Frequency by Category", x="Temperature Category", y="Count")
Histogram
#Histogram: Temperature Distribution and Density Plot: Temperature Distribution
ggplot(temp, aes(x=TemperatureCAvg)) +
geom_histogram(binwidth=1, fill='blue', alpha=0.7) +
theme_minimal() +
labs(title="Temperature Distribution", x="Average Temperature (°C)", y="Frequency")
ggplot(temp, aes(x=TemperatureCAvg)) +
geom_density(fill='blue', alpha=0.5) +
theme_minimal() +
labs(title="Density Plot: Temperature", x="Average Temperature (°C)", y="Density")
Box Plot
# Box Plot: Temperature Distribution
ggplot(temp, aes(y=TemperatureCAvg)) +
geom_boxplot(fill='red', alpha=0.6) +
theme_minimal() +
labs(title="Box Plot: Temperature", y="Temperature (°C)")
Time series Plot
# Time Series Plot: Temperature Data
ggplot(temp, aes(x=Date, y=TemperatureCAvg)) +
geom_line(color='red') +
geom_smooth(method='loess', color='blue') +
theme_minimal() +
labs(title="Average Temperature Trends", x="Date", y="Temperature (°C)")
## `geom_smooth()` using formula = 'y ~ x'
interactive bar plot
# Create an interactive bar plot for temperature categories
temp_interactive_plot <- ggplot(temp, aes(x=TempCategory)) +
geom_bar(fill='steelblue') +
theme_minimal() +
labs(title="Interactive Temperature Frequency by Category", x="Temperature Category", y="Count")
# Convert to interactive plot using plotly
ggplotly(temp_interactive_plot)
# Load necessary libraries
library(ggplot2)
library(reshape2)
##
## Attaching package: 'reshape2'
## The following object is masked from 'package:tidyr':
##
## smiths
library(corrplot)
# Read the dataset (update the filename accordingly)
data <- read.csv("C:/Users/Kush/Downloads/MA304 Assignment/temp24.csv")
# Remove rows with NA values before calculating correlation (if necessary)
data_clean <- na.omit(data)
# Select only numeric columns for correlation computation
numeric_data <- data_clean[sapply(data_clean, is.numeric)]
# Check if the correlation matrix contains any NA values or infinite values
if(any(is.na(cor_matrix)) | any(is.infinite(cor_matrix))) {
cor_matrix[is.na(cor_matrix)] <- 0 # Replace NAs with 0 (or other strategy)
cor_matrix[is.infinite(cor_matrix)] <- 0 # Replace Inf with 0
}
# Create a heatmap using ggplot2
melted_cor <- melt(cor_matrix)
ggplot(data = melted_cor, aes(x = Var1, y = Var2, fill = value)) +
geom_tile() +
scale_fill_gradient2(low = "blue", high = "red", mid = "white",
midpoint = 0, limit = c(-1,1), space = "Lab",
name="Correlation") +
theme_minimal() +
theme(axis.text.x = element_text(angle = 45, vjust = 1, hjust=1)) +
labs(title = "Correlation Heatmap of Weather Variables")
# Alternative: Use corrplot for a quick visualization
corrplot(cor_matrix, method="color", type="upper", order="hclust",
col=colorRampPalette(c("blue", "white", "red"))(200))