library(readr)
Crime_data_2021 <- read_csv("~/Documents/OneDrive/2_Apple/YPS_Crime_records/28_11_22/Crime_data_2021.csv")
Rows: 36 Columns: 8── Column specification ─────────────────────────────────────────────────────────────────────
Delimiter: ","
chr (1): State
dbl (7): Proh_IR_2021, Excie_IR_2021, NDPS_IR_2021, Poss_consup_2021, liq_nar_tot_2021, p...
ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
str(Crime_data_2021)
spc_tbl_ [36 × 8] (S3: spec_tbl_df/tbl_df/tbl/data.frame)
 $ State           : chr [1:36] "Andhra Pradesh" "Arunachal Pradesh" "Assam" "Bihar" ...
 $ Proh_IR_2021    : num [1:36] 20.3 0 0 55.9 0 ...
 $ Excie_IR_2021   : num [1:36] 31.9 0 0.5 0 83.3 0 0 35.6 40.1 1.9 ...
 $ NDPS_IR_2021    : num [1:36] 3.1 17.2 6.5 1.2 3.8 7.8 0.7 9.3 20.8 1.6 ...
 $ Poss_consup_2021: num [1:36] 1 14.2 3.6 1 0.1 3.7 0.3 5.4 6 0.9 ...
 $ liq_nar_tot_2021: num [1:36] 55.3 17.2 7 57.1 87.1 ...
 $ poss_traff_2021 : num [1:36] 2.1 2.9 2.9 0.2 3.7 4.1 0.4 3.9 14.7 0.7 ...
 $ COTPA_IR_2021   : num [1:36] 0.8 0 0 0 0.1 0 0.1 0 0 0 ...
 - attr(*, "spec")=
  .. cols(
  ..   State = col_character(),
  ..   Proh_IR_2021 = col_double(),
  ..   Excie_IR_2021 = col_double(),
  ..   NDPS_IR_2021 = col_double(),
  ..   Poss_consup_2021 = col_double(),
  ..   liq_nar_tot_2021 = col_double(),
  ..   poss_traff_2021 = col_double(),
  ..   COTPA_IR_2021 = col_double()
  .. )
 - attr(*, "problems")=<externalptr> 
Crime_data_2021$State <- as.factor(Crime_data_2021$State)
Crime_data_2021$Proh_IR_2021 <- as.numeric(scale(Crime_data_2021$Proh_IR_2021))
Crime_data_2021$Excie_IR_2021 <- as.numeric(scale(Crime_data_2021$Excie_IR_2021))
Crime_data_2021$NDPS_IR_2021 <- as.numeric(scale(Crime_data_2021$NDPS_IR_2021))
Crime_data_2021$Poss_consup_2021 <- as.numeric(scale(Crime_data_2021$Poss_consup_2021))
Crime_data_2021$liq_nar_tot_2021 <- as.numeric(scale(Crime_data_2021$liq_nar_tot_2021))
Crime_data_2021$poss_traff_2021 <- as.numeric(scale(Crime_data_2021$poss_traff_2021))
Crime_data_2021$COTPA_IR_2021 <- as.numeric(scale(Crime_data_2021$COTPA_IR_2021))
library(tidyverse)
Registered S3 methods overwritten by 'dbplyr':
  method         from
  print.tbl_lazy     
  print.tbl_sql      
── Attaching packages ──────────────────────────────────────────────────── tidyverse 1.3.2 ──✔ ggplot2 3.4.0     ✔ purrr   0.3.5
✔ tibble  3.1.8     ✔ stringr 1.4.1
✔ tidyr   1.2.1     ✔ forcats 0.5.2── Conflicts ─────────────────────────────────────────────────────── tidyverse_conflicts() ──
✖ dplyr::filter() masks stats::filter()
✖ dplyr::lag()    masks stats::lag()
head(Crime_data_2021)
library(cluster)
library(factoextra)
Welcome! Want to learn more? See two factoextra-related books at https://goo.gl/ve3WBa
gowerDist <- daisy(Crime_data_2021[,-1],"gower")
gowerMat <- as.matrix(gowerDist)
sil_width <- c(NA)
for(i in 2:8){
pam_fit <- pam(gowerMat, diss = TRUE, k = i)
sil_width[i] <- pam_fit$silinfo$avg.width
 }
plot(1:8, sil_width,
  xlab = "Number of clusters",
  ylab = "Silhouette Width")
 lines(1:8, sil_width)

pam_fit <- pam(gowerMat, diss = TRUE,3) 
summary(pam_fit)
Medoids:
     ID       
[1,] "32" "32"
[2,] "2"  "2" 
[3,] "22" "22"
Clustering vector:
 1  2  3  4  5  6  7  8  9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 
 1  2  3  1  1  3  1  3  3  1  3  2  1  3  3  1  3  3  1  2  1  3  3  1  3  1  2  1  3  3  1 
32 33 34 35 36 
 1  3  1  3  3 
Objective function:
     build       swap 
0.08342920 0.08184296 

Numerical information per cluster:
     size  max_diss    av_diss  diameter separation
[1,]   15 0.3204584 0.06826963 0.3926393 0.02000334
[2,]    4 0.2388511 0.09607152 0.2719897 0.13041115
[3,]   17 0.3779238 0.09047153 0.4963254 0.02000334

Isolated clusters:
 L-clusters: character(0)
 L*-clusters: character(0)

Silhouette plot information:
   cluster neighbor   sil_width
34       1        3  0.47682987
28       1        3  0.46919464
32       1        3  0.46527594
10       1        3  0.46029283
1        1        3  0.44708745
16       1        3  0.44694964
31       1        3  0.44138436
4        1        3  0.41631698
19       1        3  0.40092922
21       1        3  0.39094214
24       1        3  0.33919467
5        1        3  0.31848310
26       1        3  0.29602525
7        1        3  0.17472446
13       1        3  0.17144161
12       2        3  0.42230697
27       2        3  0.41766123
2        2        3  0.37448868
20       2        3  0.33811801
15       3        1  0.30387898
17       3        1  0.27314786
33       3        1  0.20113356
6        3        1  0.19811762
22       3        1  0.18958563
11       3        1  0.18062462
8        3        1  0.17715354
14       3        1  0.12141911
18       3        1  0.11143787
23       3        1  0.07305647
3        3        1  0.04861071
25       3        1  0.01353958
29       3        1 -0.03891492
9        3        2 -0.05210286
30       3        1 -0.09861406
36       3        1 -0.20392315
35       3        1 -0.25384312
Average silhouette width per cluster:
[1] 0.38100481 0.38814372 0.07319455
Average silhouette width of total data set:
[1] 0.2364432

Available components:
[1] "medoids"    "id.med"     "clustering" "objective"  "isolation"  "clusinfo"  
[7] "silinfo"    "diss"       "call"      
plot(pam_fit)

library(Rtsne)
tsne_obj <- Rtsne(gowerMat, is_distance = TRUE,perplexity = 1)
 tsne_data <- tsne_obj$Y %>%
      data.frame() %>%
      setNames(c("X", "Y")) %>%
      mutate(cluster = factor(pam_fit$clustering))
  ggplot(aes(x = X, y = Y), data = tsne_data) +
      geom_point(aes(color = cluster))

Crime_data_2021$cluster <- pam_fit$clustering
 
tab1 <- Crime_data_2021 %>%
  group_by(cluster)%>%
  summarise(across(c(2:8),mean))
tab2 <- Crime_data_2021 %>%
  group_by(cluster)%>%
  summarise("NumOfStates"= n(),
            "States"= paste0(State,collapse = ", "))

tableSummary <- left_join(tab1,tab2)
Joining, by = "cluster"
tableSummary$medioids <- Crime_data_2021$State[pam_fit$id.med]
tableSummary <- t(tableSummary)
library(kableExtra)

Attaching package: ‘kableExtra’

The following object is masked from ‘package:dplyr’:

    group_rows
kable(tableSummary,digits = 2)
cluster 1 2 3
Proh_IR_2021 0.08482629 -0.20999478 -0.02543619
Excie_IR_2021 0.12991545 -0.04127842 -0.10491871
NDPS_IR_2021 -0.7483711 1.9570694 0.1998405
Poss_consup_2021 -0.70149178 2.40934822 0.05205787
liq_nar_tot_2021 0.08520923 -0.06847944 -0.05907180
poss_traff_2021 -0.5007927 0.7247221 0.2713530
COTPA_IR_2021 -0.1988977 0.1326896 0.1442768
NumOfStates 15 4 17
States Andhra Pradesh, Bihar, Chhattisgarh, Gujarat, Jharkhand, Madhya Pradesh, Meghalaya, Odisha, Rajasthan, Telangana, Uttar Pradesh, West Bengal, D&N Haveli and Daman & Diu, Delhi, Ladakh Arunachal Pradesh, Kerala, Punjab, Uttarakhand Assam, Goa, Haryana, Himachal Pradesh, Karnataka, Maharashtra, Manipur, Mizoram, Nagaland, Sikkim, Tamil Nadu, Tripura, A&N Islands, Chandigarh, Jammu & Kashmir, Lakshadweep, Puducherry
medioids Delhi Arunachal Pradesh Sikkim
LS0tCnRpdGxlOiAiQ2x1c3RlciBBbmFseXNpcyBvZiBOQ1JCIDIwMjEiCm91dHB1dDogaHRtbF9ub3RlYm9vawotLS0KCgoKYGBge3J9CmxpYnJhcnkocmVhZHIpCkNyaW1lX2RhdGFfMjAyMSA8LSByZWFkX2Nzdigifi9Eb2N1bWVudHMvT25lRHJpdmUvMl9BcHBsZS9ZUFNfQ3JpbWVfcmVjb3Jkcy8yOF8xMV8yMi9DcmltZV9kYXRhXzIwMjEuY3N2IikKc3RyKENyaW1lX2RhdGFfMjAyMSkKYGBgCgpgYGB7cn0KQ3JpbWVfZGF0YV8yMDIxJFN0YXRlIDwtIGFzLmZhY3RvcihDcmltZV9kYXRhXzIwMjEkU3RhdGUpCkNyaW1lX2RhdGFfMjAyMSRQcm9oX0lSXzIwMjEgPC0gYXMubnVtZXJpYyhzY2FsZShDcmltZV9kYXRhXzIwMjEkUHJvaF9JUl8yMDIxKSkKQ3JpbWVfZGF0YV8yMDIxJEV4Y2llX0lSXzIwMjEgPC0gYXMubnVtZXJpYyhzY2FsZShDcmltZV9kYXRhXzIwMjEkRXhjaWVfSVJfMjAyMSkpCkNyaW1lX2RhdGFfMjAyMSRORFBTX0lSXzIwMjEgPC0gYXMubnVtZXJpYyhzY2FsZShDcmltZV9kYXRhXzIwMjEkTkRQU19JUl8yMDIxKSkKQ3JpbWVfZGF0YV8yMDIxJFBvc3NfY29uc3VwXzIwMjEgPC0gYXMubnVtZXJpYyhzY2FsZShDcmltZV9kYXRhXzIwMjEkUG9zc19jb25zdXBfMjAyMSkpCkNyaW1lX2RhdGFfMjAyMSRsaXFfbmFyX3RvdF8yMDIxIDwtIGFzLm51bWVyaWMoc2NhbGUoQ3JpbWVfZGF0YV8yMDIxJGxpcV9uYXJfdG90XzIwMjEpKQpDcmltZV9kYXRhXzIwMjEkcG9zc190cmFmZl8yMDIxIDwtIGFzLm51bWVyaWMoc2NhbGUoQ3JpbWVfZGF0YV8yMDIxJHBvc3NfdHJhZmZfMjAyMSkpCkNyaW1lX2RhdGFfMjAyMSRDT1RQQV9JUl8yMDIxIDwtIGFzLm51bWVyaWMoc2NhbGUoQ3JpbWVfZGF0YV8yMDIxJENPVFBBX0lSXzIwMjEpKQoKYGBgCgpgYGB7cn0KbGlicmFyeSh0aWR5dmVyc2UpCmhlYWQoQ3JpbWVfZGF0YV8yMDIxKQpsaWJyYXJ5KGNsdXN0ZXIpCmxpYnJhcnkoZmFjdG9leHRyYSkKZ293ZXJEaXN0IDwtIGRhaXN5KENyaW1lX2RhdGFfMjAyMVssLTFdLCJnb3dlciIpCmdvd2VyTWF0IDwtIGFzLm1hdHJpeChnb3dlckRpc3QpCnNpbF93aWR0aCA8LSBjKE5BKQpmb3IoaSBpbiAyOjgpewpwYW1fZml0IDwtIHBhbShnb3dlck1hdCwgZGlzcyA9IFRSVUUsIGsgPSBpKQpzaWxfd2lkdGhbaV0gPC0gcGFtX2ZpdCRzaWxpbmZvJGF2Zy53aWR0aAogfQpwbG90KDE6OCwgc2lsX3dpZHRoLAogIHhsYWIgPSAiTnVtYmVyIG9mIGNsdXN0ZXJzIiwKICB5bGFiID0gIlNpbGhvdWV0dGUgV2lkdGgiKQogbGluZXMoMTo4LCBzaWxfd2lkdGgpCmBgYApgYGB7cn0KcGFtX2ZpdCA8LSBwYW0oZ293ZXJNYXQsIGRpc3MgPSBUUlVFLDMpIApzdW1tYXJ5KHBhbV9maXQpCmBgYApgYGB7cn0KcGxvdChwYW1fZml0KQpgYGAKYGBge3J9CmxpYnJhcnkoUnRzbmUpCnRzbmVfb2JqIDwtIFJ0c25lKGdvd2VyTWF0LCBpc19kaXN0YW5jZSA9IFRSVUUscGVycGxleGl0eSA9IDEpCiB0c25lX2RhdGEgPC0gdHNuZV9vYmokWSAlPiUKICAgICAgZGF0YS5mcmFtZSgpICU+JQogICAgICBzZXROYW1lcyhjKCJYIiwgIlkiKSkgJT4lCiAgICAgIG11dGF0ZShjbHVzdGVyID0gZmFjdG9yKHBhbV9maXQkY2x1c3RlcmluZykpCiAgZ2dwbG90KGFlcyh4ID0gWCwgeSA9IFkpLCBkYXRhID0gdHNuZV9kYXRhKSArCiAgICAgIGdlb21fcG9pbnQoYWVzKGNvbG9yID0gY2x1c3RlcikpCmBgYApgYGB7cn0KQ3JpbWVfZGF0YV8yMDIxJGNsdXN0ZXIgPC0gcGFtX2ZpdCRjbHVzdGVyaW5nCiAKdGFiMSA8LSBDcmltZV9kYXRhXzIwMjEgJT4lCiAgZ3JvdXBfYnkoY2x1c3RlciklPiUKICBzdW1tYXJpc2UoYWNyb3NzKGMoMjo4KSxtZWFuKSkKdGFiMiA8LSBDcmltZV9kYXRhXzIwMjEgJT4lCiAgZ3JvdXBfYnkoY2x1c3RlciklPiUKICBzdW1tYXJpc2UoIk51bU9mU3RhdGVzIj0gbigpLAogICAgICAgICAgICAiU3RhdGVzIj0gcGFzdGUwKFN0YXRlLGNvbGxhcHNlID0gIiwgIikpCgp0YWJsZVN1bW1hcnkgPC0gbGVmdF9qb2luKHRhYjEsdGFiMikKdGFibGVTdW1tYXJ5JG1lZGlvaWRzIDwtIENyaW1lX2RhdGFfMjAyMSRTdGF0ZVtwYW1fZml0JGlkLm1lZF0KdGFibGVTdW1tYXJ5IDwtIHQodGFibGVTdW1tYXJ5KQpsaWJyYXJ5KGthYmxlRXh0cmEpCmthYmxlKHRhYmxlU3VtbWFyeSxkaWdpdHMgPSAyKQpgYGAKCg==