#install.packages("plyr")
#library(plyr)
 
dataQ1 <- read.csv("D:\\DataScience\\Assignments\\K Means Clustaring\\Insurance Dataset.csv")

View(dataQ1)
Data <- scale(dataQ1)

View(Data)

#data Normalization

plot(Data)

text(Data,plot(Data),rownames(Data))

km <- kmeans(Data,4) #kmeans clustering
str(km)
## List of 9
##  $ cluster     : int [1:100] 4 3 3 3 3 3 3 3 3 1 ...
##  $ centers     : num [1:4, 1:5] -0.5 1.564 -0.423 -0.334 1.396 ...
##   ..- attr(*, "dimnames")=List of 2
##   .. ..$ : chr [1:4] "1" "2" "3" "4"
##   .. ..$ : chr [1:5] "Premiums.Paid" "Age" "Days.to.Renew" "Claims.made" ...
##  $ totss       : num 495
##  $ withinss    : num [1:4] 12.6 83.5 103.7 27.3
##  $ tot.withinss: num 227
##  $ betweenss   : num 268
##  $ size        : int [1:4] 8 21 58 13
##  $ iter        : int 3
##  $ ifault      : int 0
##  - attr(*, "class")= chr "kmeans"
km$centers
##   Premiums.Paid        Age Days.to.Renew Claims.made      Income
## 1    -0.4995412  1.3962055     1.4164319   0.3602506 -0.94790023
## 2     1.5641961  1.0790341     0.3653529   0.9911327  1.22365303
## 3    -0.4226742 -0.3542967    -0.6097625  -0.4814852 -0.31497625
## 4    -0.3335910 -1.0215503     1.2586429   0.3254114  0.01193161
km$cluster
##   [1] 4 3 3 3 3 3 3 3 3 1 1 1 1 4 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 1 1 1
##  [36] 1 4 3 3 3 3 3 3 3 3 3 4 4 4 4 4 3 3 3 3 3 3 2 2 3 3 3 3 3 3 3 3 4 2 4
##  [71] 4 4 3 3 3 3 3 3 2 3 2 2 4 2 2 2 2 2 2 2 2 3 2 3 2 2 2 2 2 2
finalmodel <- data.frame(km$cluster,dataQ1)

View(finalmodel)
x <- aggregate(dataQ1[,1:5],by=list(km$cluster),FUN=mean)

x
##   Group.1 Premiums.Paid      Age Days.to.Renew Claims.made   Income
## 1       1      9150.000 65.50000      245.1250   17512.952  61000.0
## 2       2     23164.286 61.09524      152.5714   26153.455 155500.0
## 3       3      9671.983 41.18966       66.7069    5984.617  88543.1
## 4       4     10276.923 31.92308      231.2308   17035.798 102769.2