getwd()
## [1] "/Users/sanjayfuloria"
Employee_Data<-read.csv('HR_Analytics_Cluster_Analysis.csv')
Employee_Data_df = subset(Employee_Data, select = -c(ID,X8,X11,X12,X13,X14) )
# Prepare Data
mydata <- na.omit(Employee_Data_df) # listwise deletion of missing
mydata <- scale(mydata) # standardize variables
# Determine number of clusters
wss <- (nrow(mydata)-1)*sum(apply(mydata,2,var))
for (i in 2:15) wss[i] <- sum(kmeans(mydata,
centers=i)$withinss)
plot(1:15, wss, type="b", xlab="Number of Clusters",
ylab="Within groups sum of squares")
# K-Means Cluster Analysis
fit <- kmeans(mydata, 5) # 5 cluster solution
# get cluster means
aggregate(mydata,by=list(fit$cluster),FUN=mean)
## Group.1 X1 X2 X3 X4 X5 X6
## 1 1 -0.1438602 -1.1407937 0.1906476 -0.70531230 -1.1638264 -0.52755285
## 2 2 1.2152404 -0.5776453 1.0861867 0.82964053 0.6442531 0.94483851
## 3 3 -0.2488300 1.3454735 -0.6742933 0.78116270 0.8560193 0.53482538
## 4 4 0.7064294 -0.4683610 0.7659563 -0.75304020 0.2875179 -0.69144018
## 5 5 -1.1034138 0.1716466 -0.8805779 -0.09001672 -0.8353174 -0.08931144
## X7 X9 X10
## 1 -0.7649731 -1.0679993 -0.5115572
## 2 -0.4190740 1.2126242 1.4131135
## 3 0.6749189 0.3226248 0.3048517
## 4 -0.7815847 0.5050747 0.2770123
## 5 0.7752765 -0.9653071 -1.1034639
# append cluster assignment
mydata <- data.frame(mydata, fit$cluster)
# K-Means Clustering with 5 clusters
fit <- kmeans(mydata, 5)
# Cluster Plot against 1st 2 principal components
# vary parameters for most readable graph
library(cluster)
clusplot(mydata, fit$cluster, color=TRUE, shade=TRUE,
labels=2, lines=0)
##Explanation of Clusters
Cluster 1-They are average in everything. I call them ‘Average Joes’. Cluster 2- Finish work on time, good interpersonal skills, and multi taskers. Their salaries are high. I call them ‘Superstars’. Cluster 3-They attend all meetings, work well with cross functional teams but their salaries are lower than those in Cluster 2. I call them ‘Hard Workers’ Cluster 4-Don’t finish work on time, go by the book, don’t work well with cross functional teams, and salaries are the lowest. I call them ‘Laggards’ Clsuter 5- Don’t finish work on time, low on independence, and their salaries are the same as those in Cluster 3