In this R Notebook, I’m going to analyze the Crimes in India dataset obtained from data.world. You can obtain the data for yourself or you can have hands-on with the data on the wesite itself using your favourite language (you will need account for that).
Okay, let’s get into analysis. The data I’m analyzing here is from 2001-2013. The data is available in multiple csv files based on the topic (like children, women, minority etc.) and year (2001-2012, 2013 and 2014). For this analysis, I am going to use District Wise Crimes Committed as per IPC. I have to combine the data as it is available in 2 files for 2001-2012 and 2013. Let’s go.
library(ggplot2)
library(data.table)
library(highcharter)
## Highcharts (www.highcharts.com) is a Highsoft software product which is
## not free for commercial and Governmental use
library(magrittr)
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:data.table':
##
## between, first, last
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
data2012 <- read.csv("01_District_wise_crimes_committed_IPC_2001_2012.csv", header = T, stringsAsFactors = F, na.strings = "")
data2013 <- read.csv("01_District_wise_crimes_committed_IPC_2013.csv", header = T, stringsAsFactors = F, na.strings = "")
Let’s look at the data
dim(data2012)
## [1] 9017 33
dim(data2013)
## [1] 823 33
str(data2012)
## 'data.frame': 9017 obs. of 33 variables:
## $ STATE.UT : chr "ANDHRA PRADESH" "ANDHRA PRADESH" "ANDHRA PRADESH" "ANDHRA PRADESH" ...
## $ DISTRICT : chr "ADILABAD" "ANANTAPUR" "CHITTOOR" "CUDDAPAH" ...
## $ YEAR : int 2001 2001 2001 2001 2001 2001 2001 2001 2001 2001 ...
## $ MURDER : int 101 151 101 80 82 3 182 111 162 93 ...
## $ ATTEMPT.TO.MURDER : int 60 125 57 53 67 1 88 113 85 60 ...
## $ CULPABLE.HOMICIDE.NOT.AMOUNTING.TO.MURDER : int 17 1 2 1 1 0 2 7 6 1 ...
## $ RAPE : int 50 23 27 20 23 0 54 37 56 47 ...
## $ CUSTODIAL.RAPE : int 0 0 0 0 0 0 0 0 0 0 ...
## $ OTHER.RAPE : int 50 23 27 20 23 0 54 37 56 47 ...
## $ KIDNAPPING...ABDUCTION : int 46 53 59 25 49 0 82 80 67 41 ...
## $ KIDNAPPING.AND.ABDUCTION.OF.WOMEN.AND.GIRLS : int 30 30 34 20 26 0 51 39 49 30 ...
## $ KIDNAPPING.AND.ABDUCTION.OF.OTHERS : int 16 23 25 5 23 0 31 41 18 11 ...
## $ DACOITY : int 9 8 4 1 4 5 16 13 27 1 ...
## $ PREPARATION.AND.ASSEMBLY.FOR.DACOITY : int 0 0 0 0 0 0 3 0 1 0 ...
## $ ROBBERY : int 41 16 14 4 25 2 59 67 50 13 ...
## $ BURGLARY : int 198 191 237 98 437 0 338 1155 218 172 ...
## $ THEFT : int 199 366 723 173 1021 162 1122 2792 392 368 ...
## $ AUTO.THEFT : int 22 57 164 36 150 0 171 1128 54 34 ...
## $ OTHER.THEFT : int 177 309 559 137 871 162 951 1664 338 334 ...
## $ RIOTS : int 78 168 156 164 70 1 244 65 220 153 ...
## $ CRIMINAL.BREACH.OF.TRUST : int 16 11 33 12 50 0 67 101 25 35 ...
## $ CHEATING : int 104 65 209 37 220 0 300 1293 243 130 ...
## $ COUNTERFIETING : int 1 8 9 2 3 3 8 24 5 5 ...
## $ ARSON : int 30 69 38 23 41 0 43 0 33 73 ...
## $ HURT.GREVIOUS.HURT : int 1131 1543 2088 795 1244 1 1792 3137 1392 1026 ...
## $ DOWRY.DEATHS : int 16 7 14 17 12 0 7 24 62 17 ...
## $ ASSAULT.ON.WOMEN.WITH.INTENT.TO.OUTRAGE.HER.MODESTY: int 149 118 112 126 109 1 139 118 414 180 ...
## $ INSULT.TO.MODESTY.OF.WOMEN : int 34 24 83 38 58 0 129 27 81 336 ...
## $ CRUELTY.BY.HUSBAND.OR.HIS.RELATIVES : int 175 154 186 57 247 0 378 746 224 172 ...
## $ IMPORTATION.OF.GIRLS.FROM.FOREIGN.COUNTRIES : int 0 0 0 0 0 0 0 0 0 0 ...
## $ CAUSING.DEATH.BY.NEGLIGENCE : int 181 270 404 233 431 4 369 409 322 209 ...
## $ OTHER.IPC.CRIMES : int 1518 754 1262 1181 2313 104 2426 1512 1726 1450 ...
## $ TOTAL.IPC.CRIMES : int 4154 4125 5818 3140 6507 287 7848 11831 5811 4582 ...
str(data2013)
## 'data.frame': 823 obs. of 33 variables:
## $ STATE.UT : chr "Andhra Pradesh" "Andhra Pradesh" "Andhra Pradesh" "Andhra Pradesh" ...
## $ DISTRICT : chr "ADILABAD" "ANANTAPUR" "CHITTOOR" "CUDDAPAH" ...
## $ YEAR : int 2013 2013 2013 2013 2013 2013 2013 2013 2013 2013 ...
## $ MURDER : int 96 156 72 93 162 68 2 110 44 120 ...
## $ ATTEMPT.TO.MURDER : int 72 149 61 107 123 71 0 87 51 145 ...
## $ CULPABLE.HOMICIDE.NOT.AMOUNTING.TO.MURDER : int 13 3 2 7 16 6 0 1 3 5 ...
## $ RAPE : int 61 28 31 19 138 74 0 38 28 101 ...
## $ CUSTODIAL.RAPE : int 0 0 0 0 0 0 0 0 0 0 ...
## $ OTHER.RAPE : int 61 28 31 19 138 74 0 38 28 101 ...
## $ KIDNAPPING...ABDUCTION : int 65 110 52 84 192 63 0 61 46 131 ...
## $ KIDNAPPING.AND.ABDUCTION.OF.WOMEN.AND.GIRLS : int 47 84 27 50 129 33 0 54 34 52 ...
## $ KIDNAPPING.AND.ABDUCTION.OF.OTHERS : int 18 26 25 34 63 30 0 7 12 79 ...
## $ DACOITY : int 2 5 3 2 15 3 0 4 2 6 ...
## $ PREPARATION.AND.ASSEMBLY.FOR.DACOITY : int 0 0 0 0 0 0 0 0 1 0 ...
## $ ROBBERY : int 14 23 11 9 89 15 0 21 26 47 ...
## $ BURGLARY : int 274 279 157 220 1318 326 0 192 190 658 ...
## $ THEFT : int 377 597 512 702 4779 788 251 675 991 4166 ...
## $ AUTO.THEFT : int 86 154 158 255 1761 310 0 200 408 1353 ...
## $ OTHER.THEFT : int 291 443 354 447 3018 478 251 475 583 2813 ...
## $ RIOTS : int 58 56 57 156 34 50 0 41 11 118 ...
## $ CRIMINAL.BREACH.OF.TRUST : int 93 5 17 81 179 86 0 41 15 75 ...
## $ CHEATING : int 254 160 238 317 2111 222 3 305 372 2382 ...
## $ COUNTERFIETING : int 1 5 6 5 12 7 4 11 5 47 ...
## $ ARSON : int 30 29 18 34 40 21 1 21 14 62 ...
## $ HURT.GREVIOUS.HURT : int 2394 2537 937 2310 4284 2300 11 2345 1180 3092 ...
## $ DOWRY.DEATHS : int 12 23 13 9 43 15 0 16 7 39 ...
## $ ASSAULT.ON.WOMEN.WITH.INTENT.TO.OUTRAGE.HER.MODESTY: int 197 337 119 318 350 352 3 296 124 225 ...
## $ INSULT.TO.MODESTY.OF.WOMEN : int 138 43 84 163 338 222 2 135 93 90 ...
## $ CRUELTY.BY.HUSBAND.OR.HIS.RELATIVES : int 464 161 435 207 1526 483 0 608 326 1480 ...
## $ IMPORTATION.OF.GIRLS.FROM.FOREIGN.COUNTRIES : int 0 0 0 0 0 0 0 0 0 0 ...
## $ CAUSING.DEATH.BY.NEGLIGENCE : int 376 573 546 464 1104 525 0 449 226 527 ...
## $ OTHER.IPC.CRIMES : int 1390 1634 2239 1741 3139 1082 24 1569 1123 2839 ...
## $ TOTAL.IPC.CRIMES : int 6381 6913 5610 7048 19992 6779 301 7026 4878 16355 ...
Let’s combine the data.
data <- rbind(data2012,data2013)
dim(data)
## [1] 9840 33
table(is.na(data))
##
## FALSE
## 324720
We can see there is no missing value’s in the data. That’s good. Let’s analyze.
head(data, 10)
## STATE.UT DISTRICT YEAR MURDER ATTEMPT.TO.MURDER
## 1 ANDHRA PRADESH ADILABAD 2001 101 60
## 2 ANDHRA PRADESH ANANTAPUR 2001 151 125
## 3 ANDHRA PRADESH CHITTOOR 2001 101 57
## 4 ANDHRA PRADESH CUDDAPAH 2001 80 53
## 5 ANDHRA PRADESH EAST GODAVARI 2001 82 67
## 6 ANDHRA PRADESH GUNTAKAL RLY. 2001 3 1
## 7 ANDHRA PRADESH GUNTUR 2001 182 88
## 8 ANDHRA PRADESH HYDERABAD CITY 2001 111 113
## 9 ANDHRA PRADESH KARIMNAGAR 2001 162 85
## 10 ANDHRA PRADESH KHAMMAM 2001 93 60
## CULPABLE.HOMICIDE.NOT.AMOUNTING.TO.MURDER RAPE CUSTODIAL.RAPE
## 1 17 50 0
## 2 1 23 0
## 3 2 27 0
## 4 1 20 0
## 5 1 23 0
## 6 0 0 0
## 7 2 54 0
## 8 7 37 0
## 9 6 56 0
## 10 1 47 0
## OTHER.RAPE KIDNAPPING...ABDUCTION
## 1 50 46
## 2 23 53
## 3 27 59
## 4 20 25
## 5 23 49
## 6 0 0
## 7 54 82
## 8 37 80
## 9 56 67
## 10 47 41
## KIDNAPPING.AND.ABDUCTION.OF.WOMEN.AND.GIRLS
## 1 30
## 2 30
## 3 34
## 4 20
## 5 26
## 6 0
## 7 51
## 8 39
## 9 49
## 10 30
## KIDNAPPING.AND.ABDUCTION.OF.OTHERS DACOITY
## 1 16 9
## 2 23 8
## 3 25 4
## 4 5 1
## 5 23 4
## 6 0 5
## 7 31 16
## 8 41 13
## 9 18 27
## 10 11 1
## PREPARATION.AND.ASSEMBLY.FOR.DACOITY ROBBERY BURGLARY THEFT AUTO.THEFT
## 1 0 41 198 199 22
## 2 0 16 191 366 57
## 3 0 14 237 723 164
## 4 0 4 98 173 36
## 5 0 25 437 1021 150
## 6 0 2 0 162 0
## 7 3 59 338 1122 171
## 8 0 67 1155 2792 1128
## 9 1 50 218 392 54
## 10 0 13 172 368 34
## OTHER.THEFT RIOTS CRIMINAL.BREACH.OF.TRUST CHEATING COUNTERFIETING
## 1 177 78 16 104 1
## 2 309 168 11 65 8
## 3 559 156 33 209 9
## 4 137 164 12 37 2
## 5 871 70 50 220 3
## 6 162 1 0 0 3
## 7 951 244 67 300 8
## 8 1664 65 101 1293 24
## 9 338 220 25 243 5
## 10 334 153 35 130 5
## ARSON HURT.GREVIOUS.HURT DOWRY.DEATHS
## 1 30 1131 16
## 2 69 1543 7
## 3 38 2088 14
## 4 23 795 17
## 5 41 1244 12
## 6 0 1 0
## 7 43 1792 7
## 8 0 3137 24
## 9 33 1392 62
## 10 73 1026 17
## ASSAULT.ON.WOMEN.WITH.INTENT.TO.OUTRAGE.HER.MODESTY
## 1 149
## 2 118
## 3 112
## 4 126
## 5 109
## 6 1
## 7 139
## 8 118
## 9 414
## 10 180
## INSULT.TO.MODESTY.OF.WOMEN CRUELTY.BY.HUSBAND.OR.HIS.RELATIVES
## 1 34 175
## 2 24 154
## 3 83 186
## 4 38 57
## 5 58 247
## 6 0 0
## 7 129 378
## 8 27 746
## 9 81 224
## 10 336 172
## IMPORTATION.OF.GIRLS.FROM.FOREIGN.COUNTRIES CAUSING.DEATH.BY.NEGLIGENCE
## 1 0 181
## 2 0 270
## 3 0 404
## 4 0 233
## 5 0 431
## 6 0 4
## 7 0 369
## 8 0 409
## 9 0 322
## 10 0 209
## OTHER.IPC.CRIMES TOTAL.IPC.CRIMES
## 1 1518 4154
## 2 754 4125
## 3 1262 5818
## 4 1181 3140
## 5 2313 6507
## 6 104 287
## 7 2426 7848
## 8 1512 11831
## 9 1726 5811
## 10 1450 4582
data <- dplyr::mutate_all(data,.funs=toupper)
data[data$STATE.UT=="A&N ISLANDS","STATE.UT"] <- "A & N ISLANDS"
data$STATE.UT <- as.factor(data$STATE.UT)
data$DISTRICT <- as.factor(data$DISTRICT)
data[3:33] <- lapply(data[3:33],as.integer)
str(data)
## 'data.frame': 9840 obs. of 33 variables:
## $ STATE.UT : Factor w/ 36 levels "A & N ISLANDS",..: 2 2 2 2 2 2 2 2 2 2 ...
## $ DISTRICT : Factor w/ 828 levels "24 PARGANAS NORTH",..: 4 32 158 176 226 285 286 314 399 413 ...
## $ YEAR : int 2001 2001 2001 2001 2001 2001 2001 2001 2001 2001 ...
## $ MURDER : int 101 151 101 80 82 3 182 111 162 93 ...
## $ ATTEMPT.TO.MURDER : int 60 125 57 53 67 1 88 113 85 60 ...
## $ CULPABLE.HOMICIDE.NOT.AMOUNTING.TO.MURDER : int 17 1 2 1 1 0 2 7 6 1 ...
## $ RAPE : int 50 23 27 20 23 0 54 37 56 47 ...
## $ CUSTODIAL.RAPE : int 0 0 0 0 0 0 0 0 0 0 ...
## $ OTHER.RAPE : int 50 23 27 20 23 0 54 37 56 47 ...
## $ KIDNAPPING...ABDUCTION : int 46 53 59 25 49 0 82 80 67 41 ...
## $ KIDNAPPING.AND.ABDUCTION.OF.WOMEN.AND.GIRLS : int 30 30 34 20 26 0 51 39 49 30 ...
## $ KIDNAPPING.AND.ABDUCTION.OF.OTHERS : int 16 23 25 5 23 0 31 41 18 11 ...
## $ DACOITY : int 9 8 4 1 4 5 16 13 27 1 ...
## $ PREPARATION.AND.ASSEMBLY.FOR.DACOITY : int 0 0 0 0 0 0 3 0 1 0 ...
## $ ROBBERY : int 41 16 14 4 25 2 59 67 50 13 ...
## $ BURGLARY : int 198 191 237 98 437 0 338 1155 218 172 ...
## $ THEFT : int 199 366 723 173 1021 162 1122 2792 392 368 ...
## $ AUTO.THEFT : int 22 57 164 36 150 0 171 1128 54 34 ...
## $ OTHER.THEFT : int 177 309 559 137 871 162 951 1664 338 334 ...
## $ RIOTS : int 78 168 156 164 70 1 244 65 220 153 ...
## $ CRIMINAL.BREACH.OF.TRUST : int 16 11 33 12 50 0 67 101 25 35 ...
## $ CHEATING : int 104 65 209 37 220 0 300 1293 243 130 ...
## $ COUNTERFIETING : int 1 8 9 2 3 3 8 24 5 5 ...
## $ ARSON : int 30 69 38 23 41 0 43 0 33 73 ...
## $ HURT.GREVIOUS.HURT : int 1131 1543 2088 795 1244 1 1792 3137 1392 1026 ...
## $ DOWRY.DEATHS : int 16 7 14 17 12 0 7 24 62 17 ...
## $ ASSAULT.ON.WOMEN.WITH.INTENT.TO.OUTRAGE.HER.MODESTY: int 149 118 112 126 109 1 139 118 414 180 ...
## $ INSULT.TO.MODESTY.OF.WOMEN : int 34 24 83 38 58 0 129 27 81 336 ...
## $ CRUELTY.BY.HUSBAND.OR.HIS.RELATIVES : int 175 154 186 57 247 0 378 746 224 172 ...
## $ IMPORTATION.OF.GIRLS.FROM.FOREIGN.COUNTRIES : int 0 0 0 0 0 0 0 0 0 0 ...
## $ CAUSING.DEATH.BY.NEGLIGENCE : int 181 270 404 233 431 4 369 409 322 209 ...
## $ OTHER.IPC.CRIMES : int 1518 754 1262 1181 2313 104 2426 1512 1726 1450 ...
## $ TOTAL.IPC.CRIMES : int 4154 4125 5818 3140 6507 287 7848 11831 5811 4582 ...
year <-2001:2013
states <- levels(data$STATE.UT)
df <- data.frame(STATE.UT="dummy",YEAR=2000, TOTAL.IPC.CRIMES=0)
for(i in states) {
for(j in year) {
temp <- data.frame(STATE.UT=i, YEAR=j, TOTAL.IPC.CRIMES=sum(data[data$STATE.UT==i & data$YEAR==j,]$TOTAL.IPC.CRIMES))
df <- rbind(df, temp)
}
}
df <- df[-1,]
str(df)
## 'data.frame': 468 obs. of 3 variables:
## $ STATE.UT : Factor w/ 37 levels "dummy","A & N ISLANDS",..: 2 2 2 2 2 2 2 2 2 2 ...
## $ YEAR : num 2001 2002 2003 2004 2005 ...
## $ TOTAL.IPC.CRIMES: num 1316 1216 1288 1496 1364 ...
#for(i in 1:36) {
# ts <- ts(df[df$STATE.UT==states[i],]$TOTAL.IPC.CRIMES,start=2001, frequency = 12)
# hc <- hchart(ts, name = "Crimes") %>%
# hc_add_theme(hc_theme_darkunica()) %>%
# hc_credits(enabled = TRUE, text = "Sources: DATA.WORLD", style = list(fontSize = "12px")) %>%
# hc_title(text = states[i]) %>%
# hc_legend(enabled = TRUE)
# print(hc)%>% hw_grid(rowheight=250,ncol=2)
#}
lapply(states,function(x){
ts(df[df$STATE.UT==x,]$TOTAL.IPC.CRIMES,start=2001, frequency = 12) %>%
hchart(showInLegend = FALSE) %>%
hc_add_theme(hc_theme_smpl()) %>%
hc_title(text = x) %>%
hc_yAxis(title = list(text = ""))
}) %>%
hw_grid(rowheight = 225, ncol = 3)
hc_opts <- list()
hc_opts$chart <- list(type = "bar")
hc_opts$title <- list(title = "Stacked bar")
hc_opts$xAxis <- list(categories = states)
hc_opts$yAxis <- list(min = 0, title = list(text = 'Crime Increase from 2001 to 2013'))
hc_opts$legend <- list(reversed = TRUE)
hc_opts$series <- list(list(name = "2001", data = df[df$YEAR==2001,3]),
list(name = "2013", data = df[df$YEAR==2013,3]))
highchart(hc_opts, theme = hc_theme_sandsignika())
df1 <- filter(df, TOTAL.IPC.CRIMES>30000)
df2 <- filter(df, TOTAL.IPC.CRIMES<=30000)
hc_opts <- list()
hc_opts$chart <- list(type = "bar")
hc_opts$title <- list(title = "Stacked bar")
hc_opts$yAxis <- list(min = 0, title = list(text = 'Crime Increase from 2001 to 2013'))
hc_opts$legend <- list(reversed = TRUE)
hc_opts$xAxis <- list(categories = unique(df1$STATE.UT))
hc_opts$series <- list(list(name = "2001", data = df1[df1$YEAR==2001,3]),
list(name = "2013", data = df1[df1$YEAR==2013,3]))
highchart(hc_opts, theme = hc_theme_sandsignika())
hc_opts$xAxis <- list(categories = unique(df2$STATE.UT))
hc_opts$series <- list(list(name = "2001", data = df2[df2$YEAR==2001,3]),
list(name = "2013", data = df2[df2$YEAR==2013,3]))
highchart(hc_opts, theme = hc_theme_sandsignika())