library(readr)
Crime_data_2021 <- read_csv("~/Documents/OneDrive/2_Apple/YPS_Crime_records/28_11_22/Crime_data_2021.csv")
Rows: 36 Columns: 8── Column specification ─────────────────────────────────────────────────────────────────────
Delimiter: ","
chr (1): State
dbl (7): Proh_IR_2021, Excie_IR_2021, NDPS_IR_2021, Poss_consup_2021, liq_nar_tot_2021, p...
ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
str(Crime_data_2021)
spc_tbl_ [36 × 8] (S3: spec_tbl_df/tbl_df/tbl/data.frame)
$ State : chr [1:36] "Andhra Pradesh" "Arunachal Pradesh" "Assam" "Bihar" ...
$ Proh_IR_2021 : num [1:36] 20.3 0 0 55.9 0 ...
$ Excie_IR_2021 : num [1:36] 31.9 0 0.5 0 83.3 0 0 35.6 40.1 1.9 ...
$ NDPS_IR_2021 : num [1:36] 3.1 17.2 6.5 1.2 3.8 7.8 0.7 9.3 20.8 1.6 ...
$ Poss_consup_2021: num [1:36] 1 14.2 3.6 1 0.1 3.7 0.3 5.4 6 0.9 ...
$ liq_nar_tot_2021: num [1:36] 55.3 17.2 7 57.1 87.1 ...
$ poss_traff_2021 : num [1:36] 2.1 2.9 2.9 0.2 3.7 4.1 0.4 3.9 14.7 0.7 ...
$ COTPA_IR_2021 : num [1:36] 0.8 0 0 0 0.1 0 0.1 0 0 0 ...
- attr(*, "spec")=
.. cols(
.. State = col_character(),
.. Proh_IR_2021 = col_double(),
.. Excie_IR_2021 = col_double(),
.. NDPS_IR_2021 = col_double(),
.. Poss_consup_2021 = col_double(),
.. liq_nar_tot_2021 = col_double(),
.. poss_traff_2021 = col_double(),
.. COTPA_IR_2021 = col_double()
.. )
- attr(*, "problems")=<externalptr>
Crime_data_2021$State <- as.factor(Crime_data_2021$State)
Crime_data_2021$Proh_IR_2021 <- as.numeric(scale(Crime_data_2021$Proh_IR_2021))
Crime_data_2021$Excie_IR_2021 <- as.numeric(scale(Crime_data_2021$Excie_IR_2021))
Crime_data_2021$NDPS_IR_2021 <- as.numeric(scale(Crime_data_2021$NDPS_IR_2021))
Crime_data_2021$Poss_consup_2021 <- as.numeric(scale(Crime_data_2021$Poss_consup_2021))
Crime_data_2021$liq_nar_tot_2021 <- as.numeric(scale(Crime_data_2021$liq_nar_tot_2021))
Crime_data_2021$poss_traff_2021 <- as.numeric(scale(Crime_data_2021$poss_traff_2021))
Crime_data_2021$COTPA_IR_2021 <- as.numeric(scale(Crime_data_2021$COTPA_IR_2021))
library(tidyverse)
Registered S3 methods overwritten by 'dbplyr':
method from
print.tbl_lazy
print.tbl_sql
── Attaching packages ──────────────────────────────────────────────────── tidyverse 1.3.2 ──✔ ggplot2 3.4.0 ✔ purrr 0.3.5
✔ tibble 3.1.8 ✔ stringr 1.4.1
✔ tidyr 1.2.1 ✔ forcats 0.5.2── Conflicts ─────────────────────────────────────────────────────── tidyverse_conflicts() ──
✖ dplyr::filter() masks stats::filter()
✖ dplyr::lag() masks stats::lag()
head(Crime_data_2021)
library(cluster)
library(factoextra)
Welcome! Want to learn more? See two factoextra-related books at https://goo.gl/ve3WBa
gowerDist <- daisy(Crime_data_2021[,-1],"gower")
gowerMat <- as.matrix(gowerDist)
sil_width <- c(NA)
for(i in 2:8){
pam_fit <- pam(gowerMat, diss = TRUE, k = i)
sil_width[i] <- pam_fit$silinfo$avg.width
}
plot(1:8, sil_width,
xlab = "Number of clusters",
ylab = "Silhouette Width")
lines(1:8, sil_width)

pam_fit <- pam(gowerMat, diss = TRUE,3)
summary(pam_fit)
Medoids:
ID
[1,] "32" "32"
[2,] "2" "2"
[3,] "22" "22"
Clustering vector:
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31
1 2 3 1 1 3 1 3 3 1 3 2 1 3 3 1 3 3 1 2 1 3 3 1 3 1 2 1 3 3 1
32 33 34 35 36
1 3 1 3 3
Objective function:
build swap
0.08342920 0.08184296
Numerical information per cluster:
size max_diss av_diss diameter separation
[1,] 15 0.3204584 0.06826963 0.3926393 0.02000334
[2,] 4 0.2388511 0.09607152 0.2719897 0.13041115
[3,] 17 0.3779238 0.09047153 0.4963254 0.02000334
Isolated clusters:
L-clusters: character(0)
L*-clusters: character(0)
Silhouette plot information:
cluster neighbor sil_width
34 1 3 0.47682987
28 1 3 0.46919464
32 1 3 0.46527594
10 1 3 0.46029283
1 1 3 0.44708745
16 1 3 0.44694964
31 1 3 0.44138436
4 1 3 0.41631698
19 1 3 0.40092922
21 1 3 0.39094214
24 1 3 0.33919467
5 1 3 0.31848310
26 1 3 0.29602525
7 1 3 0.17472446
13 1 3 0.17144161
12 2 3 0.42230697
27 2 3 0.41766123
2 2 3 0.37448868
20 2 3 0.33811801
15 3 1 0.30387898
17 3 1 0.27314786
33 3 1 0.20113356
6 3 1 0.19811762
22 3 1 0.18958563
11 3 1 0.18062462
8 3 1 0.17715354
14 3 1 0.12141911
18 3 1 0.11143787
23 3 1 0.07305647
3 3 1 0.04861071
25 3 1 0.01353958
29 3 1 -0.03891492
9 3 2 -0.05210286
30 3 1 -0.09861406
36 3 1 -0.20392315
35 3 1 -0.25384312
Average silhouette width per cluster:
[1] 0.38100481 0.38814372 0.07319455
Average silhouette width of total data set:
[1] 0.2364432
Available components:
[1] "medoids" "id.med" "clustering" "objective" "isolation" "clusinfo"
[7] "silinfo" "diss" "call"
plot(pam_fit)

library(Rtsne)
tsne_obj <- Rtsne(gowerMat, is_distance = TRUE,perplexity = 1)
tsne_data <- tsne_obj$Y %>%
data.frame() %>%
setNames(c("X", "Y")) %>%
mutate(cluster = factor(pam_fit$clustering))
ggplot(aes(x = X, y = Y), data = tsne_data) +
geom_point(aes(color = cluster))

Crime_data_2021$cluster <- pam_fit$clustering
tab1 <- Crime_data_2021 %>%
group_by(cluster)%>%
summarise(across(c(2:8),mean))
tab2 <- Crime_data_2021 %>%
group_by(cluster)%>%
summarise("NumOfStates"= n(),
"States"= paste0(State,collapse = ", "))
tableSummary <- left_join(tab1,tab2)
Joining, by = "cluster"
tableSummary$medioids <- Crime_data_2021$State[pam_fit$id.med]
tableSummary <- t(tableSummary)
library(kableExtra)
Attaching package: ‘kableExtra’
The following object is masked from ‘package:dplyr’:
group_rows
kable(tableSummary,digits = 2)
|
cluster
|
1
|
2
|
3
|
|
Proh_IR_2021
|
0.08482629
|
-0.20999478
|
-0.02543619
|
|
Excie_IR_2021
|
0.12991545
|
-0.04127842
|
-0.10491871
|
|
NDPS_IR_2021
|
-0.7483711
|
1.9570694
|
0.1998405
|
|
Poss_consup_2021
|
-0.70149178
|
2.40934822
|
0.05205787
|
|
liq_nar_tot_2021
|
0.08520923
|
-0.06847944
|
-0.05907180
|
|
poss_traff_2021
|
-0.5007927
|
0.7247221
|
0.2713530
|
|
COTPA_IR_2021
|
-0.1988977
|
0.1326896
|
0.1442768
|
|
NumOfStates
|
15
|
4
|
17
|
|
States
|
Andhra Pradesh, Bihar, Chhattisgarh, Gujarat, Jharkhand, Madhya Pradesh,
Meghalaya, Odisha, Rajasthan, Telangana, Uttar Pradesh, West Bengal,
D&N Haveli and Daman & Diu, Delhi, Ladakh
|
Arunachal Pradesh, Kerala, Punjab, Uttarakhand
|
Assam, Goa, Haryana, Himachal Pradesh, Karnataka, Maharashtra, Manipur,
Mizoram, Nagaland, Sikkim, Tamil Nadu, Tripura, A&N Islands,
Chandigarh, Jammu & Kashmir, Lakshadweep, Puducherry
|
|
medioids
|
Delhi
|
Arunachal Pradesh
|
Sikkim
|
LS0tCnRpdGxlOiAiQ2x1c3RlciBBbmFseXNpcyBvZiBOQ1JCIDIwMjEiCm91dHB1dDogaHRtbF9ub3RlYm9vawotLS0KCgoKYGBge3J9CmxpYnJhcnkocmVhZHIpCkNyaW1lX2RhdGFfMjAyMSA8LSByZWFkX2Nzdigifi9Eb2N1bWVudHMvT25lRHJpdmUvMl9BcHBsZS9ZUFNfQ3JpbWVfcmVjb3Jkcy8yOF8xMV8yMi9DcmltZV9kYXRhXzIwMjEuY3N2IikKc3RyKENyaW1lX2RhdGFfMjAyMSkKYGBgCgpgYGB7cn0KQ3JpbWVfZGF0YV8yMDIxJFN0YXRlIDwtIGFzLmZhY3RvcihDcmltZV9kYXRhXzIwMjEkU3RhdGUpCkNyaW1lX2RhdGFfMjAyMSRQcm9oX0lSXzIwMjEgPC0gYXMubnVtZXJpYyhzY2FsZShDcmltZV9kYXRhXzIwMjEkUHJvaF9JUl8yMDIxKSkKQ3JpbWVfZGF0YV8yMDIxJEV4Y2llX0lSXzIwMjEgPC0gYXMubnVtZXJpYyhzY2FsZShDcmltZV9kYXRhXzIwMjEkRXhjaWVfSVJfMjAyMSkpCkNyaW1lX2RhdGFfMjAyMSRORFBTX0lSXzIwMjEgPC0gYXMubnVtZXJpYyhzY2FsZShDcmltZV9kYXRhXzIwMjEkTkRQU19JUl8yMDIxKSkKQ3JpbWVfZGF0YV8yMDIxJFBvc3NfY29uc3VwXzIwMjEgPC0gYXMubnVtZXJpYyhzY2FsZShDcmltZV9kYXRhXzIwMjEkUG9zc19jb25zdXBfMjAyMSkpCkNyaW1lX2RhdGFfMjAyMSRsaXFfbmFyX3RvdF8yMDIxIDwtIGFzLm51bWVyaWMoc2NhbGUoQ3JpbWVfZGF0YV8yMDIxJGxpcV9uYXJfdG90XzIwMjEpKQpDcmltZV9kYXRhXzIwMjEkcG9zc190cmFmZl8yMDIxIDwtIGFzLm51bWVyaWMoc2NhbGUoQ3JpbWVfZGF0YV8yMDIxJHBvc3NfdHJhZmZfMjAyMSkpCkNyaW1lX2RhdGFfMjAyMSRDT1RQQV9JUl8yMDIxIDwtIGFzLm51bWVyaWMoc2NhbGUoQ3JpbWVfZGF0YV8yMDIxJENPVFBBX0lSXzIwMjEpKQoKYGBgCgpgYGB7cn0KbGlicmFyeSh0aWR5dmVyc2UpCmhlYWQoQ3JpbWVfZGF0YV8yMDIxKQpsaWJyYXJ5KGNsdXN0ZXIpCmxpYnJhcnkoZmFjdG9leHRyYSkKZ293ZXJEaXN0IDwtIGRhaXN5KENyaW1lX2RhdGFfMjAyMVssLTFdLCJnb3dlciIpCmdvd2VyTWF0IDwtIGFzLm1hdHJpeChnb3dlckRpc3QpCnNpbF93aWR0aCA8LSBjKE5BKQpmb3IoaSBpbiAyOjgpewpwYW1fZml0IDwtIHBhbShnb3dlck1hdCwgZGlzcyA9IFRSVUUsIGsgPSBpKQpzaWxfd2lkdGhbaV0gPC0gcGFtX2ZpdCRzaWxpbmZvJGF2Zy53aWR0aAogfQpwbG90KDE6OCwgc2lsX3dpZHRoLAogIHhsYWIgPSAiTnVtYmVyIG9mIGNsdXN0ZXJzIiwKICB5bGFiID0gIlNpbGhvdWV0dGUgV2lkdGgiKQogbGluZXMoMTo4LCBzaWxfd2lkdGgpCmBgYApgYGB7cn0KcGFtX2ZpdCA8LSBwYW0oZ293ZXJNYXQsIGRpc3MgPSBUUlVFLDMpIApzdW1tYXJ5KHBhbV9maXQpCmBgYApgYGB7cn0KcGxvdChwYW1fZml0KQpgYGAKYGBge3J9CmxpYnJhcnkoUnRzbmUpCnRzbmVfb2JqIDwtIFJ0c25lKGdvd2VyTWF0LCBpc19kaXN0YW5jZSA9IFRSVUUscGVycGxleGl0eSA9IDEpCiB0c25lX2RhdGEgPC0gdHNuZV9vYmokWSAlPiUKICAgICAgZGF0YS5mcmFtZSgpICU+JQogICAgICBzZXROYW1lcyhjKCJYIiwgIlkiKSkgJT4lCiAgICAgIG11dGF0ZShjbHVzdGVyID0gZmFjdG9yKHBhbV9maXQkY2x1c3RlcmluZykpCiAgZ2dwbG90KGFlcyh4ID0gWCwgeSA9IFkpLCBkYXRhID0gdHNuZV9kYXRhKSArCiAgICAgIGdlb21fcG9pbnQoYWVzKGNvbG9yID0gY2x1c3RlcikpCmBgYApgYGB7cn0KQ3JpbWVfZGF0YV8yMDIxJGNsdXN0ZXIgPC0gcGFtX2ZpdCRjbHVzdGVyaW5nCiAKdGFiMSA8LSBDcmltZV9kYXRhXzIwMjEgJT4lCiAgZ3JvdXBfYnkoY2x1c3RlciklPiUKICBzdW1tYXJpc2UoYWNyb3NzKGMoMjo4KSxtZWFuKSkKdGFiMiA8LSBDcmltZV9kYXRhXzIwMjEgJT4lCiAgZ3JvdXBfYnkoY2x1c3RlciklPiUKICBzdW1tYXJpc2UoIk51bU9mU3RhdGVzIj0gbigpLAogICAgICAgICAgICAiU3RhdGVzIj0gcGFzdGUwKFN0YXRlLGNvbGxhcHNlID0gIiwgIikpCgp0YWJsZVN1bW1hcnkgPC0gbGVmdF9qb2luKHRhYjEsdGFiMikKdGFibGVTdW1tYXJ5JG1lZGlvaWRzIDwtIENyaW1lX2RhdGFfMjAyMSRTdGF0ZVtwYW1fZml0JGlkLm1lZF0KdGFibGVTdW1tYXJ5IDwtIHQodGFibGVTdW1tYXJ5KQpsaWJyYXJ5KGthYmxlRXh0cmEpCmthYmxlKHRhYmxlU3VtbWFyeSxkaWdpdHMgPSAyKQpgYGAKCg==