dataQ1 <- read.csv("C:\\Users\\Harisha\\Desktop\\Datascience Assignments\\K mean clustering\\Insurance Dataset.csv")
Data <- scale(dataQ1)
View(Data)
plot(Data)
text(Data,plot(Data),rownames(Data))

km <- kmeans(Data,4) #kmeans clustering
str(km)
## List of 9
## $ cluster : int [1:100] 2 1 1 1 1 1 1 1 1 2 ...
## $ centers : num [1:4, 1:5] -0.781 -0.512 1.286 0.765 -0.598 ...
## ..- attr(*, "dimnames")=List of 2
## .. ..$ : chr [1:4] "1" "2" "3" "4"
## .. ..$ : chr [1:5] "Premiums.Paid" "Age" "Days.to.Renew" "Claims.made" ...
## $ totss : num 495
## $ withinss : num [1:4] 42 45.9 34.2 75.5
## $ tot.withinss: num 198
## $ betweenss : num 297
## $ size : int [1:4] 36 20 9 35
## $ iter : int 4
## $ ifault : int 0
## - attr(*, "class")= chr "kmeans"
km$centers
## Premiums.Paid Age Days.to.Renew Claims.made Income
## 1 -0.7806637 -0.59797374 -0.6663706 -0.5973153393 -0.7659800
## 2 -0.5124264 -0.04392395 1.1651707 -0.0003423441 -0.5980389
## 3 1.2863898 0.74414687 1.6926142 2.5400484134 1.0244982
## 4 0.7649975 0.44880605 -0.4156457 -0.0385781892 0.8661592
km$cluster
## [1] 2 1 1 1 1 1 1 1 1 2 2 2 2 2 2 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 2 2 2 2
## [36] 3 2 2 2 1 4 1 1 1 1 4 1 2 2 2 2 1 1 1 1 4 4 4 4 1 4 4 1 4 4 4 4 4 3 2
## [71] 2 3 4 4 4 4 4 4 4 4 4 4 4 4 3 3 3 3 4 4 4 4 4 4 4 4 4 4 3 3
finalmodel <- data.frame(km$cluster,dataQ1)
View(finalmodel)
x <- aggregate(dataQ1[,1:5],by=list(km$cluster),FUN=mean)
x
## Group.1 Premiums.Paid Age Days.to.Renew Claims.made Income
## 1 1 7240.972 37.80556 61.72222 4398.218 68916.67
## 2 2 9062.500 45.50000 223.00000 12574.305 76225.00
## 3 3 21277.778 56.44444 269.44444 47367.260 146833.33
## 4 4 17737.143 52.34286 83.80000 12050.630 139942.86