Analyze the information given in the following ‘Insurance Policy dataset’ to create clusters of persons falling in the same type
library(data.table)
Insurance_Dataset <- fread("C:/Users/Pawan Srivastav/Desktop/Data Science/Assignment/K-mean clustering/Insurance Dataset.csv")
summary(Insurance_Dataset)
## Premiums Paid Age Days to Renew Claims made
## Min. : 2800 Min. :23.00 Min. : 1.0 Min. : 1978
## 1st Qu.: 6975 1st Qu.:34.00 1st Qu.: 56.0 1st Qu.: 5221
## Median :11825 Median :45.00 Median : 89.0 Median : 8386
## Mean :12542 Mean :46.11 Mean :120.4 Mean :12579
## 3rd Qu.:15475 3rd Qu.:54.50 3rd Qu.:186.5 3rd Qu.:14671
## Max. :29900 Max. :82.00 Max. :321.0 Max. :99677
## Income
## Min. : 28000
## 1st Qu.: 65125
## Median :102250
## Mean :102250
## 3rd Qu.:139375
## Max. :176500
Norm_Insrance <- scale(Insurance_Dataset)
library(cluster)
insurance_kmeans <- kmeans(Norm_Insrance,3)
insurance_clara <- clara(Norm_Insrance,3)
Insurance_Dataset_clara <- cbind(Insurance_Dataset,insurance_clara$cluster)
clusplot(insurance_clara)

aggregate(Insurance_Dataset_clara, by= list(Insurance_Dataset_clara$V2), FUN = mean)
## Group.1 Premiums Paid Age Days to Renew Claims made Income V2
## 1 1 5656.25 41.15625 100.875 4477.407 51250.0 1
## 2 2 12836.98 43.18750 108.625 10276.400 115093.8 2
## 3 3 22852.50 61.05000 179.900 31067.755 153025.0 3
library(animation)
km1 <- kmeans.ani(Norm_Insrance,3)








insurance_pam <- pam(Norm_Insrance,3)
Insurance_Dataset_pam <- cbind(Insurance_Dataset,insurance_pam$cluster)
clusplot(insurance_pam)

aggregate(Insurance_Dataset_pam, by= list(Insurance_Dataset_pam$V2), FUN = mean)
## Group.1 Premiums Paid Age Days to Renew Claims made Income V2
## 1 1 5656.25 41.15625 100.875 4477.407 51250.0 1
## 2 2 12836.98 43.18750 108.625 10276.400 115093.8 2
## 3 3 22852.50 61.05000 179.900 31067.755 153025.0 3
# Hierarichal CLustering
dist_insurance <- dist(Norm_Insrance, method = "euclidean")
hclust_ins <- hclust(dist_insurance, method = "complete")
plot(hclust_ins, hang = -1)
rect.hclust(hclust_ins,plot(hclust_ins,hang=-1),k=3,border="red")

group_ins <- cutree(hclust_ins, k=3)
Insurance_Dataset_Final <- cbind(Insurance_Dataset,group_ins)
aggregate(Insurance_Dataset_Final, by= list(Insurance_Dataset_Final$group_ins), FUN = mean)
## Group.1 Premiums Paid Age Days to Renew Claims made Income
## 1 1 9631.169 41.55844 108.0649 7859.606 88233.77
## 2 2 22230.682 61.45455 154.4545 25137.860 148818.18
## 3 3 23550.000 59.00000 321.0000 99676.744 157000.00
## group_ins
## 1 1
## 2 2
## 3 3