K Means

Insurance Dataset

Assignment 21

mydata <- read.csv("C:\\Users\\RISHI RAHUL\\Desktop\\DS\\6 Kmeans\\Assignment\\Insurance Dataset.csv")

# Normalizing continuous columns to bring them under same scale
normalized_data<-scale(mydata) 
View(normalized_data)
#wss = NULL

k_3 <- kmeans(normalized_data,3)
str(k_3)

## List of 9
##  $ cluster     : int [1:100] 3 3 3 3 3 3 3 3 3 3 ...
##  $ centers     : num [1:3, 1:5] 0.345 1.313 -0.805 -0.045 1.14 ...
##   ..- attr(*, "dimnames")=List of 2
##   .. ..$ : chr [1:3] "1" "2" "3"
##   .. ..$ : chr [1:5] "Premiums.Paid" "Age" "Days.to.Renew" "Claims.made" ...
##  $ totss       : num 495
##  $ withinss    : num [1:3] 55.5 94.5 98.3
##  $ tot.withinss: num 248
##  $ betweenss   : num 247
##  $ size        : int [1:3] 35 19 46
##  $ iter        : int 3
##  $ ifault      : int 0
##  - attr(*, "class")= chr "kmeans"

twss <- NULL
for (i in 2:15){
  twss <- c(twss,kmeans(normalized_data,i)$tot.withinss)
  
}

plot(2:15, twss,type="b", xlab="Number of Clusters", ylab="Within groups sum of squares")   # Look for an "elbow" in the scree plot #
title(sub = "K-Means Clustering Scree-Plot")

# Creating a empty variable to store total within sum of sqares of clusters
twss <- NULL
for (i in 2:15){
  twss <- c(twss,kmeans(mydata,i)$tot.withinss)
}
twss

##  [1] 65208208953 35652557781 25685879799 21404183236 18217731611
##  [6] 12140942531 16907166540  8853093239  9017669608  6671950052
## [11]  7652579894  6824957983  5715362078  4837005861

plot(2:15,twss,type="o")

# Choosing the best cluster as 5
k_5 <- kmeans(normalized_data,5)
mydata["Cluster"] <- k_5$cluster
aggregate(mydata[,-c(1,7)],by=list(mydata$Cluster),mean)

##   Group.1      Age Days.to.Renew Claims.made   Income Cluster
## 1       1 54.25000     306.00000   63623.465 137125.0       1
## 2       2 44.60000     224.30000   12511.596  76900.0       2
## 3       3 44.85714      69.65714    8507.865 122114.3       3
## 4       4 62.75000     148.37500   22928.827 157000.0       4
## 5       5 37.12000      60.72000    3541.482  54100.0       5