K-Mean Clustering

Analyze the information given in the following ‘Insurance Policy dataset’ to create clusters of persons falling in the same type

library(data.table)



Insurance_Dataset <- fread("C:/Users/Pawan Srivastav/Desktop/Data Science/Assignment/K-mean clustering/Insurance Dataset.csv")

summary(Insurance_Dataset)

##  Premiums Paid        Age        Days to Renew    Claims made   
##  Min.   : 2800   Min.   :23.00   Min.   :  1.0   Min.   : 1978  
##  1st Qu.: 6975   1st Qu.:34.00   1st Qu.: 56.0   1st Qu.: 5221  
##  Median :11825   Median :45.00   Median : 89.0   Median : 8386  
##  Mean   :12542   Mean   :46.11   Mean   :120.4   Mean   :12579  
##  3rd Qu.:15475   3rd Qu.:54.50   3rd Qu.:186.5   3rd Qu.:14671  
##  Max.   :29900   Max.   :82.00   Max.   :321.0   Max.   :99677  
##      Income      
##  Min.   : 28000  
##  1st Qu.: 65125  
##  Median :102250  
##  Mean   :102250  
##  3rd Qu.:139375  
##  Max.   :176500

Norm_Insrance <- scale(Insurance_Dataset)


library(cluster)

insurance_kmeans <- kmeans(Norm_Insrance,3)


insurance_clara <- clara(Norm_Insrance,3)

Insurance_Dataset_clara <- cbind(Insurance_Dataset,insurance_clara$cluster)

clusplot(insurance_clara)

aggregate(Insurance_Dataset_clara, by= list(Insurance_Dataset_clara$V2), FUN = mean)

##   Group.1 Premiums Paid      Age Days to Renew Claims made   Income V2
## 1       1       5656.25 41.15625       100.875    4477.407  51250.0  1
## 2       2      12836.98 43.18750       108.625   10276.400 115093.8  2
## 3       3      22852.50 61.05000       179.900   31067.755 153025.0  3

library(animation)

km1 <- kmeans.ani(Norm_Insrance,3)

insurance_pam <- pam(Norm_Insrance,3)

Insurance_Dataset_pam <- cbind(Insurance_Dataset,insurance_pam$cluster)

clusplot(insurance_pam)

aggregate(Insurance_Dataset_pam, by= list(Insurance_Dataset_pam$V2), FUN = mean)

##   Group.1 Premiums Paid      Age Days to Renew Claims made   Income V2
## 1       1       5656.25 41.15625       100.875    4477.407  51250.0  1
## 2       2      12836.98 43.18750       108.625   10276.400 115093.8  2
## 3       3      22852.50 61.05000       179.900   31067.755 153025.0  3

# Hierarichal CLustering
dist_insurance <- dist(Norm_Insrance, method = "euclidean")



hclust_ins <- hclust(dist_insurance, method = "complete")



plot(hclust_ins, hang = -1)



rect.hclust(hclust_ins,plot(hclust_ins,hang=-1),k=3,border="red")

group_ins <- cutree(hclust_ins, k=3)



Insurance_Dataset_Final <- cbind(Insurance_Dataset,group_ins)



aggregate(Insurance_Dataset_Final, by= list(Insurance_Dataset_Final$group_ins), FUN = mean)

##   Group.1 Premiums Paid      Age Days to Renew Claims made    Income
## 1       1      9631.169 41.55844      108.0649    7859.606  88233.77
## 2       2     22230.682 61.45455      154.4545   25137.860 148818.18
## 3       3     23550.000 59.00000      321.0000   99676.744 157000.00
##   group_ins
## 1         1
## 2         2
## 3         3

K-Mean Clustering

Pawan Srivastav

5 July 2018

Analyze the information given in the following ‘Insurance Policy dataset’ to create clusters of persons falling in the same type