Introduction

In this R Notebook, I’m going to analyze the Crimes in India dataset obtained from data.world. You can obtain the data for yourself or you can have hands-on with the data on the wesite itself using your favourite language (you will need account for that).

Okay, let’s get into analysis. The data I’m analyzing here is from 2001-2013. The data is available in multiple csv files based on the topic (like children, women, minority etc.) and year (2001-2012, 2013 and 2014). For this analysis, I am going to use District Wise Crimes Committed as per IPC. I have to combine the data as it is available in 2 files for 2001-2012 and 2013. Let’s go.

Loading Required Libraries

library(ggplot2)
library(data.table)
library(highcharter)
## Highcharts (www.highcharts.com) is a Highsoft software product which is
## not free for commercial and Governmental use
library(magrittr)
library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:data.table':
## 
##     between, first, last
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union

Reading the data

data2012 <- read.csv("01_District_wise_crimes_committed_IPC_2001_2012.csv", header = T, stringsAsFactors = F, na.strings = "")
data2013 <- read.csv("01_District_wise_crimes_committed_IPC_2013.csv", header = T, stringsAsFactors = F, na.strings = "")

Let’s look at the data

dim(data2012)
## [1] 9017   33
dim(data2013)
## [1] 823  33
str(data2012)
## 'data.frame':    9017 obs. of  33 variables:
##  $ STATE.UT                                           : chr  "ANDHRA PRADESH" "ANDHRA PRADESH" "ANDHRA PRADESH" "ANDHRA PRADESH" ...
##  $ DISTRICT                                           : chr  "ADILABAD" "ANANTAPUR" "CHITTOOR" "CUDDAPAH" ...
##  $ YEAR                                               : int  2001 2001 2001 2001 2001 2001 2001 2001 2001 2001 ...
##  $ MURDER                                             : int  101 151 101 80 82 3 182 111 162 93 ...
##  $ ATTEMPT.TO.MURDER                                  : int  60 125 57 53 67 1 88 113 85 60 ...
##  $ CULPABLE.HOMICIDE.NOT.AMOUNTING.TO.MURDER          : int  17 1 2 1 1 0 2 7 6 1 ...
##  $ RAPE                                               : int  50 23 27 20 23 0 54 37 56 47 ...
##  $ CUSTODIAL.RAPE                                     : int  0 0 0 0 0 0 0 0 0 0 ...
##  $ OTHER.RAPE                                         : int  50 23 27 20 23 0 54 37 56 47 ...
##  $ KIDNAPPING...ABDUCTION                             : int  46 53 59 25 49 0 82 80 67 41 ...
##  $ KIDNAPPING.AND.ABDUCTION.OF.WOMEN.AND.GIRLS        : int  30 30 34 20 26 0 51 39 49 30 ...
##  $ KIDNAPPING.AND.ABDUCTION.OF.OTHERS                 : int  16 23 25 5 23 0 31 41 18 11 ...
##  $ DACOITY                                            : int  9 8 4 1 4 5 16 13 27 1 ...
##  $ PREPARATION.AND.ASSEMBLY.FOR.DACOITY               : int  0 0 0 0 0 0 3 0 1 0 ...
##  $ ROBBERY                                            : int  41 16 14 4 25 2 59 67 50 13 ...
##  $ BURGLARY                                           : int  198 191 237 98 437 0 338 1155 218 172 ...
##  $ THEFT                                              : int  199 366 723 173 1021 162 1122 2792 392 368 ...
##  $ AUTO.THEFT                                         : int  22 57 164 36 150 0 171 1128 54 34 ...
##  $ OTHER.THEFT                                        : int  177 309 559 137 871 162 951 1664 338 334 ...
##  $ RIOTS                                              : int  78 168 156 164 70 1 244 65 220 153 ...
##  $ CRIMINAL.BREACH.OF.TRUST                           : int  16 11 33 12 50 0 67 101 25 35 ...
##  $ CHEATING                                           : int  104 65 209 37 220 0 300 1293 243 130 ...
##  $ COUNTERFIETING                                     : int  1 8 9 2 3 3 8 24 5 5 ...
##  $ ARSON                                              : int  30 69 38 23 41 0 43 0 33 73 ...
##  $ HURT.GREVIOUS.HURT                                 : int  1131 1543 2088 795 1244 1 1792 3137 1392 1026 ...
##  $ DOWRY.DEATHS                                       : int  16 7 14 17 12 0 7 24 62 17 ...
##  $ ASSAULT.ON.WOMEN.WITH.INTENT.TO.OUTRAGE.HER.MODESTY: int  149 118 112 126 109 1 139 118 414 180 ...
##  $ INSULT.TO.MODESTY.OF.WOMEN                         : int  34 24 83 38 58 0 129 27 81 336 ...
##  $ CRUELTY.BY.HUSBAND.OR.HIS.RELATIVES                : int  175 154 186 57 247 0 378 746 224 172 ...
##  $ IMPORTATION.OF.GIRLS.FROM.FOREIGN.COUNTRIES        : int  0 0 0 0 0 0 0 0 0 0 ...
##  $ CAUSING.DEATH.BY.NEGLIGENCE                        : int  181 270 404 233 431 4 369 409 322 209 ...
##  $ OTHER.IPC.CRIMES                                   : int  1518 754 1262 1181 2313 104 2426 1512 1726 1450 ...
##  $ TOTAL.IPC.CRIMES                                   : int  4154 4125 5818 3140 6507 287 7848 11831 5811 4582 ...
str(data2013)
## 'data.frame':    823 obs. of  33 variables:
##  $ STATE.UT                                           : chr  "Andhra Pradesh" "Andhra Pradesh" "Andhra Pradesh" "Andhra Pradesh" ...
##  $ DISTRICT                                           : chr  "ADILABAD" "ANANTAPUR" "CHITTOOR" "CUDDAPAH" ...
##  $ YEAR                                               : int  2013 2013 2013 2013 2013 2013 2013 2013 2013 2013 ...
##  $ MURDER                                             : int  96 156 72 93 162 68 2 110 44 120 ...
##  $ ATTEMPT.TO.MURDER                                  : int  72 149 61 107 123 71 0 87 51 145 ...
##  $ CULPABLE.HOMICIDE.NOT.AMOUNTING.TO.MURDER          : int  13 3 2 7 16 6 0 1 3 5 ...
##  $ RAPE                                               : int  61 28 31 19 138 74 0 38 28 101 ...
##  $ CUSTODIAL.RAPE                                     : int  0 0 0 0 0 0 0 0 0 0 ...
##  $ OTHER.RAPE                                         : int  61 28 31 19 138 74 0 38 28 101 ...
##  $ KIDNAPPING...ABDUCTION                             : int  65 110 52 84 192 63 0 61 46 131 ...
##  $ KIDNAPPING.AND.ABDUCTION.OF.WOMEN.AND.GIRLS        : int  47 84 27 50 129 33 0 54 34 52 ...
##  $ KIDNAPPING.AND.ABDUCTION.OF.OTHERS                 : int  18 26 25 34 63 30 0 7 12 79 ...
##  $ DACOITY                                            : int  2 5 3 2 15 3 0 4 2 6 ...
##  $ PREPARATION.AND.ASSEMBLY.FOR.DACOITY               : int  0 0 0 0 0 0 0 0 1 0 ...
##  $ ROBBERY                                            : int  14 23 11 9 89 15 0 21 26 47 ...
##  $ BURGLARY                                           : int  274 279 157 220 1318 326 0 192 190 658 ...
##  $ THEFT                                              : int  377 597 512 702 4779 788 251 675 991 4166 ...
##  $ AUTO.THEFT                                         : int  86 154 158 255 1761 310 0 200 408 1353 ...
##  $ OTHER.THEFT                                        : int  291 443 354 447 3018 478 251 475 583 2813 ...
##  $ RIOTS                                              : int  58 56 57 156 34 50 0 41 11 118 ...
##  $ CRIMINAL.BREACH.OF.TRUST                           : int  93 5 17 81 179 86 0 41 15 75 ...
##  $ CHEATING                                           : int  254 160 238 317 2111 222 3 305 372 2382 ...
##  $ COUNTERFIETING                                     : int  1 5 6 5 12 7 4 11 5 47 ...
##  $ ARSON                                              : int  30 29 18 34 40 21 1 21 14 62 ...
##  $ HURT.GREVIOUS.HURT                                 : int  2394 2537 937 2310 4284 2300 11 2345 1180 3092 ...
##  $ DOWRY.DEATHS                                       : int  12 23 13 9 43 15 0 16 7 39 ...
##  $ ASSAULT.ON.WOMEN.WITH.INTENT.TO.OUTRAGE.HER.MODESTY: int  197 337 119 318 350 352 3 296 124 225 ...
##  $ INSULT.TO.MODESTY.OF.WOMEN                         : int  138 43 84 163 338 222 2 135 93 90 ...
##  $ CRUELTY.BY.HUSBAND.OR.HIS.RELATIVES                : int  464 161 435 207 1526 483 0 608 326 1480 ...
##  $ IMPORTATION.OF.GIRLS.FROM.FOREIGN.COUNTRIES        : int  0 0 0 0 0 0 0 0 0 0 ...
##  $ CAUSING.DEATH.BY.NEGLIGENCE                        : int  376 573 546 464 1104 525 0 449 226 527 ...
##  $ OTHER.IPC.CRIMES                                   : int  1390 1634 2239 1741 3139 1082 24 1569 1123 2839 ...
##  $ TOTAL.IPC.CRIMES                                   : int  6381 6913 5610 7048 19992 6779 301 7026 4878 16355 ...

Let’s combine the data.

data <- rbind(data2012,data2013)
dim(data)
## [1] 9840   33
table(is.na(data))
## 
##  FALSE 
## 324720

We can see there is no missing value’s in the data. That’s good. Let’s analyze.

Analysis

head(data, 10)
##          STATE.UT       DISTRICT YEAR MURDER ATTEMPT.TO.MURDER
## 1  ANDHRA PRADESH       ADILABAD 2001    101                60
## 2  ANDHRA PRADESH      ANANTAPUR 2001    151               125
## 3  ANDHRA PRADESH       CHITTOOR 2001    101                57
## 4  ANDHRA PRADESH       CUDDAPAH 2001     80                53
## 5  ANDHRA PRADESH  EAST GODAVARI 2001     82                67
## 6  ANDHRA PRADESH  GUNTAKAL RLY. 2001      3                 1
## 7  ANDHRA PRADESH         GUNTUR 2001    182                88
## 8  ANDHRA PRADESH HYDERABAD CITY 2001    111               113
## 9  ANDHRA PRADESH     KARIMNAGAR 2001    162                85
## 10 ANDHRA PRADESH        KHAMMAM 2001     93                60
##    CULPABLE.HOMICIDE.NOT.AMOUNTING.TO.MURDER RAPE CUSTODIAL.RAPE
## 1                                         17   50              0
## 2                                          1   23              0
## 3                                          2   27              0
## 4                                          1   20              0
## 5                                          1   23              0
## 6                                          0    0              0
## 7                                          2   54              0
## 8                                          7   37              0
## 9                                          6   56              0
## 10                                         1   47              0
##    OTHER.RAPE KIDNAPPING...ABDUCTION
## 1          50                     46
## 2          23                     53
## 3          27                     59
## 4          20                     25
## 5          23                     49
## 6           0                      0
## 7          54                     82
## 8          37                     80
## 9          56                     67
## 10         47                     41
##    KIDNAPPING.AND.ABDUCTION.OF.WOMEN.AND.GIRLS
## 1                                           30
## 2                                           30
## 3                                           34
## 4                                           20
## 5                                           26
## 6                                            0
## 7                                           51
## 8                                           39
## 9                                           49
## 10                                          30
##    KIDNAPPING.AND.ABDUCTION.OF.OTHERS DACOITY
## 1                                  16       9
## 2                                  23       8
## 3                                  25       4
## 4                                   5       1
## 5                                  23       4
## 6                                   0       5
## 7                                  31      16
## 8                                  41      13
## 9                                  18      27
## 10                                 11       1
##    PREPARATION.AND.ASSEMBLY.FOR.DACOITY ROBBERY BURGLARY THEFT AUTO.THEFT
## 1                                     0      41      198   199         22
## 2                                     0      16      191   366         57
## 3                                     0      14      237   723        164
## 4                                     0       4       98   173         36
## 5                                     0      25      437  1021        150
## 6                                     0       2        0   162          0
## 7                                     3      59      338  1122        171
## 8                                     0      67     1155  2792       1128
## 9                                     1      50      218   392         54
## 10                                    0      13      172   368         34
##    OTHER.THEFT RIOTS CRIMINAL.BREACH.OF.TRUST CHEATING COUNTERFIETING
## 1          177    78                       16      104              1
## 2          309   168                       11       65              8
## 3          559   156                       33      209              9
## 4          137   164                       12       37              2
## 5          871    70                       50      220              3
## 6          162     1                        0        0              3
## 7          951   244                       67      300              8
## 8         1664    65                      101     1293             24
## 9          338   220                       25      243              5
## 10         334   153                       35      130              5
##    ARSON HURT.GREVIOUS.HURT DOWRY.DEATHS
## 1     30               1131           16
## 2     69               1543            7
## 3     38               2088           14
## 4     23                795           17
## 5     41               1244           12
## 6      0                  1            0
## 7     43               1792            7
## 8      0               3137           24
## 9     33               1392           62
## 10    73               1026           17
##    ASSAULT.ON.WOMEN.WITH.INTENT.TO.OUTRAGE.HER.MODESTY
## 1                                                  149
## 2                                                  118
## 3                                                  112
## 4                                                  126
## 5                                                  109
## 6                                                    1
## 7                                                  139
## 8                                                  118
## 9                                                  414
## 10                                                 180
##    INSULT.TO.MODESTY.OF.WOMEN CRUELTY.BY.HUSBAND.OR.HIS.RELATIVES
## 1                          34                                 175
## 2                          24                                 154
## 3                          83                                 186
## 4                          38                                  57
## 5                          58                                 247
## 6                           0                                   0
## 7                         129                                 378
## 8                          27                                 746
## 9                          81                                 224
## 10                        336                                 172
##    IMPORTATION.OF.GIRLS.FROM.FOREIGN.COUNTRIES CAUSING.DEATH.BY.NEGLIGENCE
## 1                                            0                         181
## 2                                            0                         270
## 3                                            0                         404
## 4                                            0                         233
## 5                                            0                         431
## 6                                            0                           4
## 7                                            0                         369
## 8                                            0                         409
## 9                                            0                         322
## 10                                           0                         209
##    OTHER.IPC.CRIMES TOTAL.IPC.CRIMES
## 1              1518             4154
## 2               754             4125
## 3              1262             5818
## 4              1181             3140
## 5              2313             6507
## 6               104              287
## 7              2426             7848
## 8              1512            11831
## 9              1726             5811
## 10             1450             4582
data <- dplyr::mutate_all(data,.funs=toupper)
data[data$STATE.UT=="A&N ISLANDS","STATE.UT"] <- "A & N ISLANDS"
data$STATE.UT <- as.factor(data$STATE.UT)
data$DISTRICT <- as.factor(data$DISTRICT)
data[3:33] <- lapply(data[3:33],as.integer)
str(data)
## 'data.frame':    9840 obs. of  33 variables:
##  $ STATE.UT                                           : Factor w/ 36 levels "A & N ISLANDS",..: 2 2 2 2 2 2 2 2 2 2 ...
##  $ DISTRICT                                           : Factor w/ 828 levels "24 PARGANAS NORTH",..: 4 32 158 176 226 285 286 314 399 413 ...
##  $ YEAR                                               : int  2001 2001 2001 2001 2001 2001 2001 2001 2001 2001 ...
##  $ MURDER                                             : int  101 151 101 80 82 3 182 111 162 93 ...
##  $ ATTEMPT.TO.MURDER                                  : int  60 125 57 53 67 1 88 113 85 60 ...
##  $ CULPABLE.HOMICIDE.NOT.AMOUNTING.TO.MURDER          : int  17 1 2 1 1 0 2 7 6 1 ...
##  $ RAPE                                               : int  50 23 27 20 23 0 54 37 56 47 ...
##  $ CUSTODIAL.RAPE                                     : int  0 0 0 0 0 0 0 0 0 0 ...
##  $ OTHER.RAPE                                         : int  50 23 27 20 23 0 54 37 56 47 ...
##  $ KIDNAPPING...ABDUCTION                             : int  46 53 59 25 49 0 82 80 67 41 ...
##  $ KIDNAPPING.AND.ABDUCTION.OF.WOMEN.AND.GIRLS        : int  30 30 34 20 26 0 51 39 49 30 ...
##  $ KIDNAPPING.AND.ABDUCTION.OF.OTHERS                 : int  16 23 25 5 23 0 31 41 18 11 ...
##  $ DACOITY                                            : int  9 8 4 1 4 5 16 13 27 1 ...
##  $ PREPARATION.AND.ASSEMBLY.FOR.DACOITY               : int  0 0 0 0 0 0 3 0 1 0 ...
##  $ ROBBERY                                            : int  41 16 14 4 25 2 59 67 50 13 ...
##  $ BURGLARY                                           : int  198 191 237 98 437 0 338 1155 218 172 ...
##  $ THEFT                                              : int  199 366 723 173 1021 162 1122 2792 392 368 ...
##  $ AUTO.THEFT                                         : int  22 57 164 36 150 0 171 1128 54 34 ...
##  $ OTHER.THEFT                                        : int  177 309 559 137 871 162 951 1664 338 334 ...
##  $ RIOTS                                              : int  78 168 156 164 70 1 244 65 220 153 ...
##  $ CRIMINAL.BREACH.OF.TRUST                           : int  16 11 33 12 50 0 67 101 25 35 ...
##  $ CHEATING                                           : int  104 65 209 37 220 0 300 1293 243 130 ...
##  $ COUNTERFIETING                                     : int  1 8 9 2 3 3 8 24 5 5 ...
##  $ ARSON                                              : int  30 69 38 23 41 0 43 0 33 73 ...
##  $ HURT.GREVIOUS.HURT                                 : int  1131 1543 2088 795 1244 1 1792 3137 1392 1026 ...
##  $ DOWRY.DEATHS                                       : int  16 7 14 17 12 0 7 24 62 17 ...
##  $ ASSAULT.ON.WOMEN.WITH.INTENT.TO.OUTRAGE.HER.MODESTY: int  149 118 112 126 109 1 139 118 414 180 ...
##  $ INSULT.TO.MODESTY.OF.WOMEN                         : int  34 24 83 38 58 0 129 27 81 336 ...
##  $ CRUELTY.BY.HUSBAND.OR.HIS.RELATIVES                : int  175 154 186 57 247 0 378 746 224 172 ...
##  $ IMPORTATION.OF.GIRLS.FROM.FOREIGN.COUNTRIES        : int  0 0 0 0 0 0 0 0 0 0 ...
##  $ CAUSING.DEATH.BY.NEGLIGENCE                        : int  181 270 404 233 431 4 369 409 322 209 ...
##  $ OTHER.IPC.CRIMES                                   : int  1518 754 1262 1181 2313 104 2426 1512 1726 1450 ...
##  $ TOTAL.IPC.CRIMES                                   : int  4154 4125 5818 3140 6507 287 7848 11831 5811 4582 ...
year <-2001:2013
states <- levels(data$STATE.UT)
df <- data.frame(STATE.UT="dummy",YEAR=2000, TOTAL.IPC.CRIMES=0)
for(i in states) {
  for(j in year) {
    temp <- data.frame(STATE.UT=i, YEAR=j, TOTAL.IPC.CRIMES=sum(data[data$STATE.UT==i & data$YEAR==j,]$TOTAL.IPC.CRIMES))
    df <- rbind(df, temp)
  }
}
df <- df[-1,]
str(df)
## 'data.frame':    468 obs. of  3 variables:
##  $ STATE.UT        : Factor w/ 37 levels "dummy","A & N ISLANDS",..: 2 2 2 2 2 2 2 2 2 2 ...
##  $ YEAR            : num  2001 2002 2003 2004 2005 ...
##  $ TOTAL.IPC.CRIMES: num  1316 1216 1288 1496 1364 ...
#for(i in 1:36) {
#  ts <- ts(df[df$STATE.UT==states[i],]$TOTAL.IPC.CRIMES,start=2001, frequency = 12)
#  hc <- hchart(ts, name = "Crimes") %>% 
#  hc_add_theme(hc_theme_darkunica()) %>%
#  hc_credits(enabled = TRUE, text = "Sources: DATA.WORLD", style = list(fontSize = "12px")) %>%
#  hc_title(text = states[i]) %>%
#  hc_legend(enabled = TRUE) 
#  print(hc)%>% hw_grid(rowheight=250,ncol=2)
#}

lapply(states,function(x){
  ts(df[df$STATE.UT==x,]$TOTAL.IPC.CRIMES,start=2001, frequency = 12) %>% 
    hchart(showInLegend = FALSE) %>% 
    hc_add_theme(hc_theme_smpl()) %>% 
    hc_title(text = x) %>% 
    hc_yAxis(title = list(text = ""))
  }) %>% 
  hw_grid(rowheight = 225, ncol = 3)
hc_opts <- list()
hc_opts$chart <- list(type = "bar")
hc_opts$title <- list(title = "Stacked bar")
hc_opts$xAxis <- list(categories = states)
hc_opts$yAxis <- list(min = 0, title = list(text = 'Crime Increase from 2001 to 2013'))
hc_opts$legend <- list(reversed = TRUE)
hc_opts$series <- list(list(name = "2001", data = df[df$YEAR==2001,3]),
                       list(name = "2013", data = df[df$YEAR==2013,3]))

highchart(hc_opts, theme = hc_theme_sandsignika())
df1 <- filter(df, TOTAL.IPC.CRIMES>30000)
df2 <- filter(df, TOTAL.IPC.CRIMES<=30000)
hc_opts <- list()
hc_opts$chart <- list(type = "bar")
hc_opts$title <- list(title = "Stacked bar")
hc_opts$yAxis <- list(min = 0, title = list(text = 'Crime Increase from 2001 to 2013'))
hc_opts$legend <- list(reversed = TRUE)

hc_opts$xAxis <- list(categories = unique(df1$STATE.UT))
hc_opts$series <- list(list(name = "2001", data = df1[df1$YEAR==2001,3]),
                       list(name = "2013", data = df1[df1$YEAR==2013,3]))
highchart(hc_opts, theme = hc_theme_sandsignika())
hc_opts$xAxis <- list(categories = unique(df2$STATE.UT))
hc_opts$series <- list(list(name = "2001", data = df2[df2$YEAR==2001,3]),
                       list(name = "2013", data = df2[df2$YEAR==2013,3]))
highchart(hc_opts, theme = hc_theme_sandsignika())