library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(readr)
library(GGally)
## Warning: package 'GGally' was built under R version 4.4.3
## Loading required package: ggplot2
## Registered S3 method overwritten by 'GGally':
## method from
## +.gg ggplot2
library(ggplot2)
library(lubridate)
##
## Attaching package: 'lubridate'
## The following objects are masked from 'package:base':
##
## date, intersect, setdiff, union
#loading dataset
crime_dataset_india <- read_csv("D:/CAP482_2025/PROJECT DS/crime_ds.csv")
## Rows: 12748 Columns: 15
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (9): Date of Occurrence, Time of Occurrence, City, Crime Description, V...
## dbl (5): Report Number, Crime Code, Victim Age, Police Deployed, year
## dttm (1): Date Reported
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
View(crime_dataset_india)
crime <- crime_dataset_india
View(crime)
selected_cities <- c("Delhi", "Mumbai", "Bangalore", "Hyderabad", "Chennai",
"Kolkata", "Ahmedabad", "Pune", "Lucknow", "Jaipur",
"Patna", "Kanpur", "Surat", "Indore")
# Subset the dataset using the City column
crime_subset <- subset(crime, City %in% selected_cities)
#char to ddtm conversion
crime_a <- crime_subset %>%
mutate(
year = substr(`Date Reported`, 7, 10)
) %>%
group_by(year) %>%
slice_sample(prop = 0.40) %>%
ungroup()
crime_a$`Date of Occurrence` <- mdy_hm(crime_a$`Date of Occurrence`)
crime_a$`Date Reported` <- dmy_hm(crime_a$`Date Reported`)
## Warning: All formats failed to parse. No formats found.
crime_a$`Date Case Closed` <- dmy_hm(crime_a$`Date Case Closed`)
crime_a$`Time of Occurrence` <- dmy_hm(crime_a$`Time of Occurrence`)