title: “NCB Project Analysis” author: “Jitender Singh, Srija Sama, Sanjani Ksheerasagar” date: “2025-11-30” output: word_document: default html_document: default —
library(readxl)
library(dplyr)
library(ggplot2)
library(corrplot)
library(cluster)
library(tidyr)
data <- read_excel("C:/Users/jithu/Downloads/W07404-XLS-ENG.xlsx", sheet = "data")
data$bloc <- factor(data$bloc, labels = c("Non-Metro", "Metro"))
data$ccon <- factor(data$ccon, labels = c("Low", "High"))
summary(data)
## bnum badd bloc bsize
## Min. : 1.00 Length:128 Non-Metro:43 Min. : 5.00
## 1st Qu.: 32.75 Class :character Metro :85 1st Qu.:11.00
## Median : 64.50 Mode :character Median :16.00
## Mean : 64.50 Mean :17.42
## 3rd Qu.: 96.25 3rd Qu.:21.25
## Max. :128.00 Max. :55.00
## ecuso equal einvol etra
## Min. :3.030 Min. :2.610 Min. :2.740 Min. :2.510
## 1st Qu.:3.520 1st Qu.:3.405 1st Qu.:3.260 1st Qu.:3.107
## Median :3.750 Median :3.590 Median :3.430 Median :3.260
## Mean :3.758 Mean :3.589 Mean :3.441 Mean :3.258
## 3rd Qu.:3.973 3rd Qu.:3.775 3rd Qu.:3.650 3rd Qu.:3.420
## Max. :4.700 Max. :4.300 Max. :4.110 Max. :4.030
## ecomm eteam eeng eitl
## Min. :2.180 Min. :2.600 Min. :2.500 Min. :1.400
## 1st Qu.:3.050 1st Qu.:3.390 1st Qu.:3.083 1st Qu.:2.138
## Median :3.220 Median :3.580 Median :3.270 Median :2.330
## Mean :3.238 Mean :3.543 Mean :3.275 Mean :2.422
## 3rd Qu.:3.393 3rd Qu.:3.720 3rd Qu.:3.440 3rd Qu.:2.715
## Max. :3.900 Max. :4.050 Max. :4.400 Max. :3.650
## eben cserq cbrtel cbr
## Min. :2.830 Min. :2.850 Min. :3.910 Min. :3.710
## 1st Qu.:3.550 1st Qu.:3.200 1st Qu.:4.160 1st Qu.:4.030
## Median :3.790 Median :3.315 Median :4.260 Median :4.180
## Mean :3.745 Mean :3.336 Mean :4.268 Mean :4.179
## 3rd Qu.:3.922 3rd Qu.:3.440 3rd Qu.:4.372 3rd Qu.:4.310
## Max. :4.400 Max. :3.910 Max. :4.600 Max. :4.660
## cbrpb cloy ccon teltr prod
## Min. :3.770 Min. :3.05 Low :64 Min. :1660 Min. :166.0
## 1st Qu.:4.050 1st Qu.:3.65 High:64 1st Qu.:3521 1st Qu.:252.5
## Median :4.130 Median :3.77 Median :4046 Median :292.0
## Mean :4.144 Mean :3.76 Mean :3977 Mean :290.5
## 3rd Qu.:4.242 3rd Qu.:3.86 3rd Qu.:4375 3rd Qu.:324.2
## Max. :4.530 Max. :4.22 Max. :5637 Max. :441.0
data %>%
select(eeng, etra, ecomm, eteam, equal, ecuso) %>%
pivot_longer(cols = everything(), names_to = "Variable", values_to = "Score") %>%
ggplot(aes(x = Score, fill = Variable)) +
geom_histogram(bins = 10, alpha = 0.6) +
theme_minimal() +
labs(title = "Distribution of Employee Engagement Metrics")
corr_vars <- c("ecuso","equal","einvol","etra","ecomm","eteam","eeng",
"cserq","cbrtel","cbr","cbrpb","cloy","teltr","prod")
corr_matrix <- cor(data[, corr_vars], use = "complete.obs")
corrplot(corr_matrix, method = "color", type = "upper")
model_prod <- lm(prod ~ eeng + etra + ecomm + eteam + cserq + cloy, data = data)
summary(model_prod)
##
## Call:
## lm(formula = prod ~ eeng + etra + ecomm + eteam + cserq + cloy,
## data = data)
##
## Residuals:
## Min 1Q Median 3Q Max
## -113.087 -37.717 -2.675 31.589 149.468
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 84.582 111.731 0.757 0.4505
## eeng -8.302 18.075 -0.459 0.6468
## etra -22.639 27.620 -0.820 0.4140
## ecomm 41.894 24.855 1.686 0.0945 .
## eteam 14.808 22.343 0.663 0.5087
## cserq -59.619 59.129 -1.008 0.3153
## cloy 84.489 57.412 1.472 0.1437
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 51.73 on 121 degrees of freedom
## Multiple R-squared: 0.06078, Adjusted R-squared: 0.0142
## F-statistic: 1.305 on 6 and 121 DF, p-value: 0.26
cluster_vars <- data[, c("ecuso","equal","etra","ecomm","eteam","eeng","cloy","prod")]
cl_scaled <- scale(cluster_vars)
set.seed(123)
k3 <- kmeans(cl_scaled, centers = 3)
data$cluster <- as.factor(k3$cluster)
ggplot(data, aes(x = eeng, y = cloy, color = cluster)) +
geom_point(size = 3) +
theme_minimal() +
labs(title = "Cluster Analysis")
write.csv(data, “C:/Users/jithu/Downloads/NCB_cleaned_clusters.csv”, row.names = FALSE)