Cluster Analysis

getwd()
## [1] "/Users/sanjayfuloria"
Employee_Data<-read.csv('HR_Analytics_Cluster_Analysis.csv')
Employee_Data_df = subset(Employee_Data, select = -c(ID,X8,X11,X12,X13,X14) )
# Prepare Data
mydata <- na.omit(Employee_Data_df) # listwise deletion of missing
mydata <- scale(mydata) # standardize variables

# Determine number of clusters
wss <- (nrow(mydata)-1)*sum(apply(mydata,2,var))
for (i in 2:15) wss[i] <- sum(kmeans(mydata,
                                     centers=i)$withinss)
plot(1:15, wss, type="b", xlab="Number of Clusters",
     ylab="Within groups sum of squares")

# K-Means Cluster Analysis
fit <- kmeans(mydata, 5) # 5 cluster solution
# get cluster means
aggregate(mydata,by=list(fit$cluster),FUN=mean)
##   Group.1         X1         X2         X3          X4         X5          X6
## 1       1 -0.1438602 -1.1407937  0.1906476 -0.70531230 -1.1638264 -0.52755285
## 2       2  1.2152404 -0.5776453  1.0861867  0.82964053  0.6442531  0.94483851
## 3       3 -0.2488300  1.3454735 -0.6742933  0.78116270  0.8560193  0.53482538
## 4       4  0.7064294 -0.4683610  0.7659563 -0.75304020  0.2875179 -0.69144018
## 5       5 -1.1034138  0.1716466 -0.8805779 -0.09001672 -0.8353174 -0.08931144
##           X7         X9        X10
## 1 -0.7649731 -1.0679993 -0.5115572
## 2 -0.4190740  1.2126242  1.4131135
## 3  0.6749189  0.3226248  0.3048517
## 4 -0.7815847  0.5050747  0.2770123
## 5  0.7752765 -0.9653071 -1.1034639
# append cluster assignment
mydata <- data.frame(mydata, fit$cluster)

# K-Means Clustering with 5 clusters
fit <- kmeans(mydata, 5)

# Cluster Plot against 1st 2 principal components

# vary parameters for most readable graph
library(cluster)
clusplot(mydata, fit$cluster, color=TRUE, shade=TRUE,
         labels=2, lines=0)

##Explanation of Clusters

Cluster 1-They are average in everything. I call them ‘Average Joes’. Cluster 2- Finish work on time, good interpersonal skills, and multi taskers. Their salaries are high. I call them ‘Superstars’. Cluster 3-They attend all meetings, work well with cross functional teams but their salaries are lower than those in Cluster 2. I call them ‘Hard Workers’ Cluster 4-Don’t finish work on time, go by the book, don’t work well with cross functional teams, and salaries are the lowest. I call them ‘Laggards’ Clsuter 5- Don’t finish work on time, low on independence, and their salaries are the same as those in Cluster 3