ca 02 markdown

library(dplyr)

## 
## Attaching package: 'dplyr'

## The following objects are masked from 'package:stats':
## 
##     filter, lag

## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union

library(readr)
library(GGally)

## Warning: package 'GGally' was built under R version 4.4.3

## Loading required package: ggplot2

## Registered S3 method overwritten by 'GGally':
##   method from   
##   +.gg   ggplot2

library(ggplot2)
library(lubridate)

## 
## Attaching package: 'lubridate'

## The following objects are masked from 'package:base':
## 
##     date, intersect, setdiff, union

#loading dataset
crime_dataset_india <- read_csv("D:/CAP482_2025/PROJECT DS/crime_ds.csv")

## Rows: 12748 Columns: 15
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr  (9): Date of Occurrence, Time of Occurrence, City, Crime Description, V...
## dbl  (5): Report Number, Crime Code, Victim Age, Police Deployed, year
## dttm (1): Date Reported
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.

View(crime_dataset_india)
crime <- crime_dataset_india
View(crime)
selected_cities <- c("Delhi", "Mumbai", "Bangalore", "Hyderabad", "Chennai",
                     "Kolkata", "Ahmedabad", "Pune", "Lucknow", "Jaipur", 
                     "Patna", "Kanpur", "Surat", "Indore")

# Subset the dataset using the City column
crime_subset <- subset(crime, City %in% selected_cities)

#char to ddtm conversion

crime_a <- crime_subset %>%
  mutate(
    year = substr(`Date Reported`, 7, 10)
  ) %>%
  group_by(year) %>%
  slice_sample(prop = 0.40) %>%
  ungroup()
crime_a$`Date of Occurrence` <- mdy_hm(crime_a$`Date of Occurrence`)
crime_a$`Date Reported` <- dmy_hm(crime_a$`Date Reported`)

## Warning: All formats failed to parse. No formats found.

crime_a$`Date Case Closed` <- dmy_hm(crime_a$`Date Case Closed`)
crime_a$`Time of Occurrence` <- dmy_hm(crime_a$`Time of Occurrence`)

ca 02 markdown

OM CHAUHAN

2025-04-24