#install.packages("plyr")
#library(plyr)
dataQ1 <- read.csv("D:\\DataScience\\Assignments\\K Means Clustaring\\Insurance Dataset.csv")
View(dataQ1)
Data <- scale(dataQ1)
View(Data)
#data Normalization
plot(Data)
text(Data,plot(Data),rownames(Data))

km <- kmeans(Data,4) #kmeans clustering
str(km)
## List of 9
## $ cluster : int [1:100] 4 3 3 3 3 3 3 3 3 1 ...
## $ centers : num [1:4, 1:5] -0.5 1.564 -0.423 -0.334 1.396 ...
## ..- attr(*, "dimnames")=List of 2
## .. ..$ : chr [1:4] "1" "2" "3" "4"
## .. ..$ : chr [1:5] "Premiums.Paid" "Age" "Days.to.Renew" "Claims.made" ...
## $ totss : num 495
## $ withinss : num [1:4] 12.6 83.5 103.7 27.3
## $ tot.withinss: num 227
## $ betweenss : num 268
## $ size : int [1:4] 8 21 58 13
## $ iter : int 3
## $ ifault : int 0
## - attr(*, "class")= chr "kmeans"
km$centers
## Premiums.Paid Age Days.to.Renew Claims.made Income
## 1 -0.4995412 1.3962055 1.4164319 0.3602506 -0.94790023
## 2 1.5641961 1.0790341 0.3653529 0.9911327 1.22365303
## 3 -0.4226742 -0.3542967 -0.6097625 -0.4814852 -0.31497625
## 4 -0.3335910 -1.0215503 1.2586429 0.3254114 0.01193161
km$cluster
## [1] 4 3 3 3 3 3 3 3 3 1 1 1 1 4 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 1 1 1
## [36] 1 4 3 3 3 3 3 3 3 3 3 4 4 4 4 4 3 3 3 3 3 3 2 2 3 3 3 3 3 3 3 3 4 2 4
## [71] 4 4 3 3 3 3 3 3 2 3 2 2 4 2 2 2 2 2 2 2 2 3 2 3 2 2 2 2 2 2
finalmodel <- data.frame(km$cluster,dataQ1)
View(finalmodel)
x <- aggregate(dataQ1[,1:5],by=list(km$cluster),FUN=mean)
x
## Group.1 Premiums.Paid Age Days.to.Renew Claims.made Income
## 1 1 9150.000 65.50000 245.1250 17512.952 61000.0
## 2 2 23164.286 61.09524 152.5714 26153.455 155500.0
## 3 3 9671.983 41.18966 66.7069 5984.617 88543.1
## 4 4 10276.923 31.92308 231.2308 17035.798 102769.2