Required packages

library(tidyr)
library(dplyr)
library(readr)
library(outliers)
library(forecast)

Executive Summary

Data

Caste <- read_csv("Caste.csv")
Parsed with column specification:
cols(
  state_name = col_character(),
  is_state = col_integer(),
  year = col_integer(),
  gender = col_character(),
  caste = col_character(),
  convicts = col_integer(),
  under_trial = col_integer(),
  detenues = col_integer(),
  others = col_integer()
)
head(Caste)
Death_sentence <- read_csv("Death_sentence.csv")
Parsed with column specification:
cols(
  state_name = col_character(),
  year = col_integer(),
  no_capital_punishment = col_integer(),
  no_life_imprisonment = col_integer(),
  no_executed = col_integer()
)
head(Death_sentence)
a <- inner_join(Caste, Death_sentence)
Joining, by = c("state_name", "year")
head(a)

Understand

a$is_state <- as.logical(a$is_state)
a$gender <- factor(a$gender)
a$caste <- factor(a$caste, levels = c("ST","SC","OBC","Others"),labels = c("ST","SC","OBC","Higher_caste"),ordered = TRUE)
dim(a)
[1] 3560   12
str(a)
Classes ‘tbl_df’, ‘tbl’ and 'data.frame':   3560 obs. of  12 variables:
 $ state_name           : chr  "Andhra Pradesh" "Andhra Pradesh" "Andhra Pradesh" "Andhra Pradesh" ...
 $ is_state             : logi  TRUE TRUE TRUE TRUE TRUE TRUE ...
 $ year                 : int  2001 2001 2001 2001 2002 2002 2002 2002 2003 2003 ...
 $ gender               : Factor w/ 2 levels "Female","Male": 2 2 2 2 2 2 2 2 2 2 ...
 $ caste                : Ord.factor w/ 4 levels "ST"<"SC"<"OBC"<..: 2 1 3 4 2 1 3 4 2 1 ...
 $ convicts             : int  1236 418 1526 939 1177 384 1833 1327 1153 381 ...
 $ under_trial          : int  1836 1205 3359 1960 1573 962 2951 2506 1622 990 ...
 $ detenues             : int  0 0 4 0 2 0 0 0 0 0 ...
 $ others               : int  5 1 8 8 0 0 0 0 0 0 ...
 $ no_capital_punishment: int  1 1 1 1 3 3 3 3 1 1 ...
 $ no_life_imprisonment : int  1 1 1 1 1 1 1 1 1 1 ...
 $ no_executed          : int  0 0 0 0 0 0 0 0 0 0 ...
attributes(a)
$`row.names`
   [1]    1    2    3    4    5    6    7    8    9   10   11   12   13   14   15   16   17
  [18]   18   19   20   21   22   23   24   25   26   27   28   29   30   31   32   33   34
  [35]   35   36   37   38   39   40   41   42   43   44   45   46   47   48   49   50   51
  [52]   52   53   54   55   56   57   58   59   60   61   62   63   64   65   66   67   68
  [69]   69   70   71   72   73   74   75   76   77   78   79   80   81   82   83   84   85
  [86]   86   87   88   89   90   91   92   93   94   95   96   97   98   99  100  101  102
 [103]  103  104  105  106  107  108  109  110  111  112  113  114  115  116  117  118  119
 [120]  120  121  122  123  124  125  126  127  128  129  130  131  132  133  134  135  136
 [137]  137  138  139  140  141  142  143  144  145  146  147  148  149  150  151  152  153
 [154]  154  155  156  157  158  159  160  161  162  163  164  165  166  167  168  169  170
 [171]  171  172  173  174  175  176  177  178  179  180  181  182  183  184  185  186  187
 [188]  188  189  190  191  192  193  194  195  196  197  198  199  200  201  202  203  204
 [205]  205  206  207  208  209  210  211  212  213  214  215  216  217  218  219  220  221
 [222]  222  223  224  225  226  227  228  229  230  231  232  233  234  235  236  237  238
 [239]  239  240  241  242  243  244  245  246  247  248  249  250  251  252  253  254  255
 [256]  256  257  258  259  260  261  262  263  264  265  266  267  268  269  270  271  272
 [273]  273  274  275  276  277  278  279  280  281  282  283  284  285  286  287  288  289
 [290]  290  291  292  293  294  295  296  297  298  299  300  301  302  303  304  305  306
 [307]  307  308  309  310  311  312  313  314  315  316  317  318  319  320  321  322  323
 [324]  324  325  326  327  328  329  330  331  332  333  334  335  336  337  338  339  340
 [341]  341  342  343  344  345  346  347  348  349  350  351  352  353  354  355  356  357
 [358]  358  359  360  361  362  363  364  365  366  367  368  369  370  371  372  373  374
 [375]  375  376  377  378  379  380  381  382  383  384  385  386  387  388  389  390  391
 [392]  392  393  394  395  396  397  398  399  400  401  402  403  404  405  406  407  408
 [409]  409  410  411  412  413  414  415  416  417  418  419  420  421  422  423  424  425
 [426]  426  427  428  429  430  431  432  433  434  435  436  437  438  439  440  441  442
 [443]  443  444  445  446  447  448  449  450  451  452  453  454  455  456  457  458  459
 [460]  460  461  462  463  464  465  466  467  468  469  470  471  472  473  474  475  476
 [477]  477  478  479  480  481  482  483  484  485  486  487  488  489  490  491  492  493
 [494]  494  495  496  497  498  499  500  501  502  503  504  505  506  507  508  509  510
 [511]  511  512  513  514  515  516  517  518  519  520  521  522  523  524  525  526  527
 [528]  528  529  530  531  532  533  534  535  536  537  538  539  540  541  542  543  544
 [545]  545  546  547  548  549  550  551  552  553  554  555  556  557  558  559  560  561
 [562]  562  563  564  565  566  567  568  569  570  571  572  573  574  575  576  577  578
 [579]  579  580  581  582  583  584  585  586  587  588  589  590  591  592  593  594  595
 [596]  596  597  598  599  600  601  602  603  604  605  606  607  608  609  610  611  612
 [613]  613  614  615  616  617  618  619  620  621  622  623  624  625  626  627  628  629
 [630]  630  631  632  633  634  635  636  637  638  639  640  641  642  643  644  645  646
 [647]  647  648  649  650  651  652  653  654  655  656  657  658  659  660  661  662  663
 [664]  664  665  666  667  668  669  670  671  672  673  674  675  676  677  678  679  680
 [681]  681  682  683  684  685  686  687  688  689  690  691  692  693  694  695  696  697
 [698]  698  699  700  701  702  703  704  705  706  707  708  709  710  711  712  713  714
 [715]  715  716  717  718  719  720  721  722  723  724  725  726  727  728  729  730  731
 [732]  732  733  734  735  736  737  738  739  740  741  742  743  744  745  746  747  748
 [749]  749  750  751  752  753  754  755  756  757  758  759  760  761  762  763  764  765
 [766]  766  767  768  769  770  771  772  773  774  775  776  777  778  779  780  781  782
 [783]  783  784  785  786  787  788  789  790  791  792  793  794  795  796  797  798  799
 [800]  800  801  802  803  804  805  806  807  808  809  810  811  812  813  814  815  816
 [817]  817  818  819  820  821  822  823  824  825  826  827  828  829  830  831  832  833
 [834]  834  835  836  837  838  839  840  841  842  843  844  845  846  847  848  849  850
 [851]  851  852  853  854  855  856  857  858  859  860  861  862  863  864  865  866  867
 [868]  868  869  870  871  872  873  874  875  876  877  878  879  880  881  882  883  884
 [885]  885  886  887  888  889  890  891  892  893  894  895  896  897  898  899  900  901
 [902]  902  903  904  905  906  907  908  909  910  911  912  913  914  915  916  917  918
 [919]  919  920  921  922  923  924  925  926  927  928  929  930  931  932  933  934  935
 [936]  936  937  938  939  940  941  942  943  944  945  946  947  948  949  950  951  952
 [953]  953  954  955  956  957  958  959  960  961  962  963  964  965  966  967  968  969
 [970]  970  971  972  973  974  975  976  977  978  979  980  981  982  983  984  985  986
 [987]  987  988  989  990  991  992  993  994  995  996  997  998  999 1000
 [ reached getOption("max.print") -- omitted 2560 entries ]

$names
 [1] "state_name"            "is_state"              "year"                 
 [4] "gender"                "caste"                 "convicts"             
 [7] "under_trial"           "detenues"              "others"               
[10] "no_capital_punishment" "no_life_imprisonment"  "no_executed"          

$class
[1] "tbl_df"     "tbl"        "data.frame"
summary(a)
  state_name         is_state            year         gender              caste    
 Length:3560        Mode :logical   Min.   :2001   Female:1780   ST          :890  
 Class :character   FALSE:720       1st Qu.:2004   Male  :1780   SC          :890  
 Mode  :character   TRUE :2840      Median :2007                 OBC         :890  
                                    Mean   :2007                 Higher_caste:890  
                                    3rd Qu.:2010                                   
                                    Max.   :2013                                   
    convicts       under_trial         detenues          others       
 Min.   :   0.0   Min.   :    0.0   Min.   :  0.00   Min.   :   0.00  
 1st Qu.:   1.0   1st Qu.:    3.0   1st Qu.:  0.00   1st Qu.:   0.00  
 Median :  26.0   Median :   65.5   Median :  0.00   Median :   0.00  
 Mean   : 400.4   Mean   :  850.4   Mean   : 11.15   Mean   :  20.89  
 3rd Qu.: 289.0   3rd Qu.:  517.2   3rd Qu.:  0.00   3rd Qu.:   0.00  
 Max.   :9836.0   Max.   :21341.0   Max.   :925.00   Max.   :6039.00  
 no_capital_punishment no_life_imprisonment  no_executed      
 Min.   : 0.000        Min.   :  0.00       Min.   :0.000000  
 1st Qu.: 0.000        1st Qu.:  0.00       1st Qu.:0.000000  
 Median : 0.000        Median :  0.00       Median :0.000000  
 Mean   : 3.787        Mean   : 10.11       Mean   :0.006742  
 3rd Qu.: 4.000        3rd Qu.:  2.00       3rd Qu.:0.000000  
 Max.   :57.000        Max.   :919.00       Max.   :1.000000  

Tidy & Manipulate Data I

head(a)

Tidy & Manipulate Data II

a <- mutate(a, Total_punishments = no_capital_punishment + no_life_imprisonment)
head(a)

Scan I

sum(is.na(a))
[1] 0
sum(is.nan(a$convicts))
[1] 0
sum(is.nan(a$under_trial))
[1] 0
sum(is.nan(a$detenues))
[1] 0
sum(is.nan(a$others))
[1] 0
sum(is.nan(a$no_capital_punishment))
[1] 0
sum(is.nan(a$no_life_imprisonment))
[1] 0
sum(is.nan(a$no_executed))
[1] 0
sum(is.nan(a$Total_punishments))
[1] 0

Scan II

Before Capping

boxplot(a$convicts,main="BoxPlot of convicts")

boxplot(a$under_trial,main="BoxPlot of under_trial")

boxplot(a$detenues,main="BoxPlot of detenues")

boxplot(a$others,main="BoxPlot of others")

boxplot(a$no_capital_punishment,main="BoxPlot of no_capital_punishment")

boxplot(a$no_life_imprisonment,main="BoxPlot of no_life_imprisonment")

boxplot(a$no_executed,main="BoxPlot of no_executed")

boxplot(a$Total_punishments,main="BoxPlot of Total_punishments")

cap <- function(x){
  quantiles <- quantile( x, c(.05, 0.25, 0.75, .95 ) )
  x[ x < quantiles[2] - 1.5*IQR(x) ] <- quantiles[1]
  x[ x > quantiles[3] + 1.5*IQR(x) ] <- quantiles[4]
  x
}
a_sub<-a %>% dplyr::select(convicts,under_trial,detenues,others,no_capital_punishment,no_life_imprisonment,no_executed,Total_punishments)
summary(a_sub)
    convicts       under_trial         detenues          others       
 Min.   :   0.0   Min.   :    0.0   Min.   :  0.00   Min.   :   0.00  
 1st Qu.:   1.0   1st Qu.:    3.0   1st Qu.:  0.00   1st Qu.:   0.00  
 Median :  26.0   Median :   65.5   Median :  0.00   Median :   0.00  
 Mean   : 400.4   Mean   :  850.4   Mean   : 11.15   Mean   :  20.89  
 3rd Qu.: 289.0   3rd Qu.:  517.2   3rd Qu.:  0.00   3rd Qu.:   0.00  
 Max.   :9836.0   Max.   :21341.0   Max.   :925.00   Max.   :6039.00  
 no_capital_punishment no_life_imprisonment  no_executed       Total_punishments
 Min.   : 0.000        Min.   :  0.00       Min.   :0.000000   Min.   :  0.00   
 1st Qu.: 0.000        1st Qu.:  0.00       1st Qu.:0.000000   1st Qu.:  0.00   
 Median : 0.000        Median :  0.00       Median :0.000000   Median :  1.00   
 Mean   : 3.787        Mean   : 10.11       Mean   :0.006742   Mean   : 13.89   
 3rd Qu.: 4.000        3rd Qu.:  2.00       3rd Qu.:0.000000   3rd Qu.:  8.00   
 Max.   :57.000        Max.   :919.00       Max.   :1.000000   Max.   :928.00   
a_cap<-sapply(a_sub,FUN = cap)
summary(a_cap)
    convicts       under_trial        detenues         others       no_capital_punishment
 Min.   :   0.0   Min.   :   0.0   Min.   : 0.00   Min.   : 0.000   Min.   : 0.000       
 1st Qu.:   1.0   1st Qu.:   3.0   1st Qu.: 0.00   1st Qu.: 0.000   1st Qu.: 0.000       
 Median :  26.0   Median :  65.5   Median : 0.00   Median : 0.000   Median : 0.000       
 Mean   : 437.1   Mean   : 818.5   Mean   :11.84   Mean   : 2.706   Mean   : 3.681       
 3rd Qu.: 289.0   3rd Qu.: 517.2   3rd Qu.: 0.00   3rd Qu.: 0.000   3rd Qu.: 4.000       
 Max.   :2066.2   Max.   :3875.8   Max.   :48.00   Max.   :16.000   Max.   :19.000       
 no_life_imprisonment  no_executed Total_punishments
 Min.   : 0.000       Min.   :0    Min.   : 0.000   
 1st Qu.: 0.000       1st Qu.:0    1st Qu.: 0.000   
 Median : 0.000       Median :0    Median : 1.000   
 Mean   : 3.816       Mean   :0    Mean   : 7.431   
 3rd Qu.: 2.000       3rd Qu.:0    3rd Qu.: 8.000   
 Max.   :22.000       Max.   :0    Max.   :41.000   
d1<-a[,1:5]
d2<-cbind(d1,a_cap)
head(d2)

After Capping

boxplot(d2$convicts,main="BoxPlot of convicts")

boxplot(d2$under_trial,main="BoxPlot of under_trial")

boxplot(d2$detenues,main="BoxPlot of detenues")

boxplot(d2$others,main="BoxPlot of others")

boxplot(d2$no_capital_punishment,main="BoxPlot of no_capital_punishment")

boxplot(d2$no_life_imprisonment,main="BoxPlot of no_life_imprisonment")

boxplot(d2$no_executed,main="BoxPlot of no_executed")

boxplot(d2$Total_punishments,main="BoxPlot of Total_punishments")

Transform

  • In this step Datatransformation technique is applied to the “Total_punishments” varible.

  • At first the histogram for this variable is plotted to check for skeweness. The histogram is a right skewed.

  • Hence, logarithmic transformation technique is applied to reduce the skeweness and make it more normally distributed.

  • Finally, the histogram is plotted to display the normal distribution of “Total_punishments” variable.

hist(d2$Total_punishments)

d2$Total_punishments <- log(d2$Total_punishments)
hist(d2$Total_punishments)



