Assignment 21
mydata <- read.csv("C:\\Users\\RISHI RAHUL\\Desktop\\DS\\6 Kmeans\\Assignment\\Insurance Dataset.csv")
# Normalizing continuous columns to bring them under same scale
normalized_data<-scale(mydata)
View(normalized_data)
#wss = NULL
k_3 <- kmeans(normalized_data,3)
str(k_3)
## List of 9
## $ cluster : int [1:100] 3 3 3 3 3 3 3 3 3 3 ...
## $ centers : num [1:3, 1:5] 0.345 1.313 -0.805 -0.045 1.14 ...
## ..- attr(*, "dimnames")=List of 2
## .. ..$ : chr [1:3] "1" "2" "3"
## .. ..$ : chr [1:5] "Premiums.Paid" "Age" "Days.to.Renew" "Claims.made" ...
## $ totss : num 495
## $ withinss : num [1:3] 55.5 94.5 98.3
## $ tot.withinss: num 248
## $ betweenss : num 247
## $ size : int [1:3] 35 19 46
## $ iter : int 3
## $ ifault : int 0
## - attr(*, "class")= chr "kmeans"
twss <- NULL
for (i in 2:15){
twss <- c(twss,kmeans(normalized_data,i)$tot.withinss)
}
plot(2:15, twss,type="b", xlab="Number of Clusters", ylab="Within groups sum of squares") # Look for an "elbow" in the scree plot #
title(sub = "K-Means Clustering Scree-Plot")

# Creating a empty variable to store total within sum of sqares of clusters
twss <- NULL
for (i in 2:15){
twss <- c(twss,kmeans(mydata,i)$tot.withinss)
}
twss
## [1] 65208208953 35652557781 25685879799 21404183236 18217731611
## [6] 12140942531 16907166540 8853093239 9017669608 6671950052
## [11] 7652579894 6824957983 5715362078 4837005861
plot(2:15,twss,type="o")

# Choosing the best cluster as 5
k_5 <- kmeans(normalized_data,5)
mydata["Cluster"] <- k_5$cluster
aggregate(mydata[,-c(1,7)],by=list(mydata$Cluster),mean)
## Group.1 Age Days.to.Renew Claims.made Income Cluster
## 1 1 54.25000 306.00000 63623.465 137125.0 1
## 2 2 44.60000 224.30000 12511.596 76900.0 2
## 3 3 44.85714 69.65714 8507.865 122114.3 3
## 4 4 62.75000 148.37500 22928.827 157000.0 4
## 5 5 37.12000 60.72000 3541.482 54100.0 5