Note:
1) Consider the number of clusters as 2.
2) You can make clusters based on b.tech .
3) Plot the clusters using plot function and replace the points in the graph with their placement status.
m<-read.csv("C:/Users/pradeep/OneDrive/datasets/bbn.csv")
head(m)
## Gender SSC.Percentage inter.Diploma.perc B.Tech.perc Backlogs
## 1 M 87.30 65.3 40.00 18
## 2 F 89.00 92.4 71.45 0
## 3 F 67.00 68.0 45.26 13
## 4 M 71.00 70.4 36.47 17
## 5 M 67.00 65.5 42.52 17
## 6 M 81.26 68.0 62.20 6
## registered_for_.training Placement.status
## 1 No Not placed
## 2 yes placed
## 3 yes Not placed
## 4 yes Not placed
## 5 yes Not placed
## 6 yes Not placed
clust<-kmeans(m$B.Tech.perc,2) # Number of clusters desired are 2.
print(clust)
## K-means clustering with 2 clusters of sizes 67, 50
##
## Cluster means:
## [,1]
## 1 71.57092
## 2 51.38185
##
## Clustering vector:
## [1] 2 1 2 2 2 1 2 2 2 2 1 2 2 1 2 1 1 1 1 1 1 1 1 1 2 2 2 2 1 1 1 2 1 2 1
## [36] 1 1 1 2 1 1 2 2 1 1 1 2 1 2 1 1 2 2 1 2 1 2 2 1 1 2 1 2 2 1 1 1 1 2 2
## [71] 1 2 1 1 1 2 2 1 1 2 1 1 2 1 1 1 2 2 1 1 2 1 1 2 1 1 1 1 1 1 1 1 1 2 1
## [106] 2 2 2 2 2 2 2 2 1 1 1 1
##
## Within cluster sum of squares by cluster:
## [1] 2189.909 3023.810
## (between_SS / total_SS = 69.1 %)
##
## Available components:
##
## [1] "cluster" "centers" "totss" "withinss"
## [5] "tot.withinss" "betweenss" "size" "iter"
## [9] "ifault"
plot(m$B.Tech.perc,type = "n") # type=n is for no plotting (default is points type="p")
# in the text function, we specify what to plot.
#"x=m$B.Tech.perc" tells what to plot.
#"labels = m$Placement.status" tells how the points are represented.
#"col = clust$cluster" tells what criteria to follow for coloring
text(x=m$B.Tech.perc,labels = m$Placement.status, col = clust$cluster)