02/09/2025
We are simulating the act of collecting data .Each sub-sample should be as long as roughly 50% percent of your data. Store each sample set in a separate data frame (e.g., df_2 might be the second of these samples). Of course, these sub-samples should each include both categorical and continuous (numeric) data
data <- read.csv("C:/Users/rbada/Downloads/productivity+prediction+of+garment+employees/garments_worker_productivity.csv")
### Create 5 Random Sub-Samples (with Replacement)
set.seed(123)
num_samples <- 5
sample_size <- round(0.5 * nrow(data))
sample_data_frames <- list()
for (i in 1:num_samples) {
sample_df <- data[sample(1:nrow(data), size = sample_size, replace = TRUE), ]
assign(paste0("df_", i), sample_df)
sample_data_frames[[paste0("sample_", i)]] <- sample_df
}
for (i in 1:num_samples) {
cat("Number of rows in df_", i, ": ", nrow(get(paste0("df_", i))), "\n")
print(head(get(paste0("df_", i))))
}
## Number of rows in df_ 1 : 598
## date quarter department day team targeted_productivity smv
## 415 1/24/2015 Quarter4 sweing Saturday 8 0.35 15.09
## 463 1/27/2015 Quarter4 finishing Tuesday 3 0.75 3.94
## 179 1/11/2015 Quarter2 finishing Sunday 10 0.80 3.94
## 526 1/31/2015 Quarter5 finishing Saturday 9 0.75 3.94
## 195 1/12/2015 Quarter2 finishing Monday 4 0.35 4.30
## 938 2/25/2015 Quarter4 finishing Wednesday 8 0.70 4.60
## wip over_time incentive idle_time idle_men no_of_style_change
## 415 1448 9360 23 0 0 0
## 463 NA 1440 0 0 0 0
## 179 NA 1440 0 0 0 0
## 526 NA 240 0 0 0 0
## 195 NA 3240 0 0 0 0
## 938 NA 960 0 0 0 0
## no_of_workers actual_productivity
## 415 52 0.3499895
## 463 12 0.8618750
## 179 8 0.8282955
## 526 2 0.9718667
## 195 18 0.9422138
## 938 8 0.6585417
## Number of rows in df_ 2 : 598
## date quarter department day team targeted_productivity smv
## 753 2/14/2015 Quarter2 finishing Saturday 12 0.80 4.08
## 398 1/24/2015 Quarter4 sweing Saturday 4 0.75 22.52
## 1179 3/11/2015 Quarter2 sweing Wednesday 12 0.80 15.26
## 818 2/17/2015 Quarter3 sweing Tuesday 9 0.65 18.79
## 786 2/16/2015 Quarter3 sweing Monday 1 0.80 22.52
## 660 2/8/2015 Quarter2 sweing Sunday 3 0.80 22.52
## wip over_time incentive idle_time idle_men no_of_style_change
## 753 NA 1080 0 0 0 0
## 398 727 10260 94 0 0 0
## 1179 470 4080 63 0 0 0
## 818 2120 5520 0 0 0 1
## 786 1422 6840 113 0 0 0
## 660 1283 6720 88 0 0 0
## no_of_workers actual_productivity
## 753 9 0.8008889
## 398 57 0.9003211
## 1179 34 0.8004020
## 818 51 0.6501340
## 786 57 1.0002304
## 660 56 0.9001298
## Number of rows in df_ 3 : 598
## date quarter department day team targeted_productivity smv
## 674 2/8/2015 Quarter2 sweing Sunday 8 0.70 24.26
## 719 2/11/2015 Quarter2 sweing Wednesday 10 0.80 22.52
## 837 2/18/2015 Quarter3 finishing Wednesday 7 0.70 5.13
## 183 1/11/2015 Quarter2 sweing Sunday 2 0.80 28.08
## 465 1/27/2015 Quarter4 finishing Tuesday 8 0.65 3.94
## 753 2/14/2015 Quarter2 finishing Saturday 12 0.80 4.08
## wip over_time incentive idle_time idle_men no_of_style_change no_of_workers
## 674 154 6840 0 0 0 0 57.0
## 719 598 0 75 0 0 0 56.0
## 837 NA 960 0 0 0 0 8.0
## 183 805 10530 63 0 0 0 58.5
## 465 NA 960 0 0 0 0 8.0
## 753 NA 1080 0 0 0 0 9.0
## actual_productivity
## 674 0.3532596
## 719 0.8503646
## 837 0.6718750
## 183 0.8000000
## 465 0.8454583
## 753 0.8008889
## Number of rows in df_ 4 : 598
## date quarter department day team targeted_productivity smv
## 122 1/7/2015 Quarter1 sweing Wednesday 5 0.70 21.98
## 471 1/27/2015 Quarter4 sweing Tuesday 9 0.70 29.12
## 215 1/12/2015 Quarter2 sweing Monday 4 0.35 22.40
## 489 1/28/2015 Quarter4 sweing Wednesday 9 0.70 29.12
## 532 1/31/2015 Quarter5 finishing Saturday 6 0.60 3.94
## 522 1/31/2015 Quarter5 finishing Saturday 3 0.80 3.94
## wip over_time incentive idle_time idle_men no_of_style_change
## 122 413 9720 40 0 0 0
## 471 1294 6960 50 0 0 0
## 215 581 7350 0 0 0 0
## 489 1340 6960 63 0 0 0
## 532 NA 1200 0 0 0 0
## 522 NA 960 0 0 0 0
## no_of_workers actual_productivity
## 122 58.0 0.7004808
## 471 58.0 0.7003862
## 215 51.5 0.3506330
## 489 58.0 0.7505931
## 532 10.0 0.9718667
## 522 8.0 0.9718667
## Number of rows in df_ 5 : 598
## date quarter department day team targeted_productivity smv
## 209 1/12/2015 Quarter2 finishing Monday 8 0.8 2.90
## 529 1/31/2015 Quarter5 finishing Saturday 7 0.7 3.94
## 483 1/28/2015 Quarter4 sweing Wednesday 10 0.8 22.52
## 1130 3/9/2015 Quarter2 finishing Monday 12 0.8 4.60
## 1042 3/3/2015 Quarter1 finishing Tuesday 1 0.7 3.94
## 246 1/14/2015 Quarter2 sweing Wednesday 8 0.8 25.90
## wip over_time incentive idle_time idle_men no_of_style_change
## 209 NA 1440 0 0 0 0
## 529 NA 1200 0 0 0 0
## 483 1175 6720 60 0 0 0
## 1130 NA 0 1080 0 0 0
## 1042 NA 3360 0 0 0 0
## 246 1218 10170 60 0 0 0
## no_of_workers actual_productivity
## 209 8.0 0.7250000
## 529 10.0 0.9718667
## 483 56.0 0.8505321
## 1130 9.0 0.9029630
## 1042 8.0 0.5554306
## 246 56.5 0.8501368
for (i in 1:num_samples) {
cat("\nSummary of df_", i, ":\n")
print(summary(get(paste0("df_", i))))
}
##
## Summary of df_ 1 :
## date quarter department day
## Length:598 Length:598 Length:598 Length:598
## Class :character Class :character Class :character Class :character
## Mode :character Mode :character Mode :character Mode :character
##
##
##
##
## team targeted_productivity smv wip
## Min. : 1.000 Min. :0.3500 Min. : 2.90 Min. : 11
## 1st Qu.: 3.000 1st Qu.:0.7000 1st Qu.: 3.94 1st Qu.: 783
## Median : 6.000 Median :0.7500 Median :15.26 Median : 1054
## Mean : 6.283 Mean :0.7242 Mean :15.15 Mean : 1196
## 3rd Qu.: 9.000 3rd Qu.:0.8000 3rd Qu.:24.26 3rd Qu.: 1274
## Max. :12.000 Max. :0.8000 Max. :51.02 Max. :21540
## NA's :243
## over_time incentive idle_time idle_men
## Min. : 0 Min. : 0.00 Min. : 0.0000 Min. : 0.0000
## 1st Qu.: 1440 1st Qu.: 0.00 1st Qu.: 0.0000 1st Qu.: 0.0000
## Median : 4080 Median : 23.00 Median : 0.0000 Median : 0.0000
## Mean : 4609 Mean : 43.33 Mean : 0.5627 Mean : 0.3428
## 3rd Qu.: 6960 3rd Qu.: 50.00 3rd Qu.: 0.0000 3rd Qu.: 0.0000
## Max. :15120 Max. :3600.00 Max. :150.0000 Max. :35.0000
##
## no_of_style_change no_of_workers actual_productivity
## Min. :0.0000 Min. : 2.00 Min. :0.2473
## 1st Qu.:0.0000 1st Qu.:10.00 1st Qu.:0.6503
## Median :0.0000 Median :45.00 Median :0.7842
## Mean :0.1421 Mean :35.68 Mean :0.7402
## 3rd Qu.:0.0000 3rd Qu.:57.00 3rd Qu.:0.8506
## Max. :2.0000 Max. :60.00 Max. :1.1204
##
##
## Summary of df_ 2 :
## date quarter department day
## Length:598 Length:598 Length:598 Length:598
## Class :character Class :character Class :character Class :character
## Mode :character Mode :character Mode :character Mode :character
##
##
##
##
## team targeted_productivity smv wip
## Min. : 1.000 Min. :0.3500 Min. : 2.90 Min. : 7.0
## 1st Qu.: 3.000 1st Qu.:0.7000 1st Qu.: 3.94 1st Qu.: 723.5
## Median : 6.000 Median :0.7500 Median :15.26 Median : 980.0
## Mean : 6.319 Mean :0.7355 Mean :15.21 Mean : 965.3
## 3rd Qu.: 9.000 3rd Qu.:0.8000 3rd Qu.:24.26 3rd Qu.:1222.5
## Max. :12.000 Max. :0.8000 Max. :51.02 Max. :2120.0
## NA's :247
## over_time incentive idle_time idle_men
## Min. : 0 Min. : 0.00 Min. : 0.0000 Min. : 0.0000
## 1st Qu.: 1440 1st Qu.: 0.00 1st Qu.: 0.0000 1st Qu.: 0.0000
## Median : 4080 Median : 23.00 Median : 0.0000 Median : 0.0000
## Mean : 4560 Mean : 34.14 Mean : 0.9281 Mean : 0.2759
## 3rd Qu.: 6960 3rd Qu.: 50.00 3rd Qu.: 0.0000 3rd Qu.: 0.0000
## Max. :15000 Max. :1200.00 Max. :270.0000 Max. :45.0000
##
## no_of_style_change no_of_workers actual_productivity
## Min. :0.0000 Min. : 2.00 Min. :0.2337
## 1st Qu.:0.0000 1st Qu.: 8.00 1st Qu.:0.6502
## Median :0.0000 Median :34.00 Median :0.7667
## Mean :0.1304 Mean :34.64 Mean :0.7326
## 3rd Qu.:0.0000 3rd Qu.:57.00 3rd Qu.:0.8343
## Max. :2.0000 Max. :60.00 Max. :1.1204
##
##
## Summary of df_ 3 :
## date quarter department day
## Length:598 Length:598 Length:598 Length:598
## Class :character Class :character Class :character Class :character
## Mode :character Mode :character Mode :character Mode :character
##
##
##
##
## team targeted_productivity smv wip
## Min. : 1.000 Min. :0.0700 Min. : 2.90 Min. : 7
## 1st Qu.: 4.000 1st Qu.:0.7000 1st Qu.: 3.94 1st Qu.: 730
## Median : 6.000 Median :0.7500 Median :12.52 Median : 983
## Mean : 6.368 Mean :0.7238 Mean :14.90 Mean : 1043
## 3rd Qu.: 9.000 3rd Qu.:0.8000 3rd Qu.:24.26 3rd Qu.: 1194
## Max. :12.000 Max. :0.8000 Max. :50.89 Max. :12261
## NA's :263
## over_time incentive idle_time idle_men
## Min. : 0 Min. : 0.00 Min. : 0.000 Min. : 0.0000
## 1st Qu.: 1440 1st Qu.: 0.00 1st Qu.: 0.000 1st Qu.: 0.0000
## Median : 3840 Median : 0.00 Median : 0.000 Median : 0.0000
## Mean : 4469 Mean : 28.74 Mean : 1.676 Mean : 0.8478
## 3rd Qu.: 6840 3rd Qu.: 50.00 3rd Qu.: 0.000 3rd Qu.: 0.0000
## Max. :25920 Max. :960.00 Max. :300.000 Max. :45.0000
##
## no_of_style_change no_of_workers actual_productivity
## Min. :0.0000 Min. : 2.00 Min. :0.2358
## 1st Qu.:0.0000 1st Qu.: 8.00 1st Qu.:0.6316
## Median :0.0000 Median :34.00 Median :0.7552
## Mean :0.1555 Mean :33.93 Mean :0.7239
## 3rd Qu.:0.0000 3rd Qu.:57.00 3rd Qu.:0.8501
## Max. :2.0000 Max. :60.00 Max. :1.1005
##
##
## Summary of df_ 4 :
## date quarter department day
## Length:598 Length:598 Length:598 Length:598
## Class :character Class :character Class :character Class :character
## Mode :character Mode :character Mode :character Mode :character
##
##
##
##
## team targeted_productivity smv wip
## Min. : 1.000 Min. :0.0700 Min. : 2.90 Min. : 7.0
## 1st Qu.: 4.000 1st Qu.:0.7000 1st Qu.: 3.94 1st Qu.: 810.5
## Median : 7.000 Median :0.7500 Median :14.89 Median : 1035.0
## Mean : 6.674 Mean :0.7286 Mean :14.55 Mean : 1134.1
## 3rd Qu.: 9.750 3rd Qu.:0.8000 3rd Qu.:22.94 3rd Qu.: 1216.5
## Max. :12.000 Max. :0.8000 Max. :54.56 Max. :23122.0
## NA's :267
## over_time incentive idle_time idle_men
## Min. : 0 Min. : 0.00 Min. :0.00000 Min. : 0.0000
## 1st Qu.: 1440 1st Qu.: 0.00 1st Qu.:0.00000 1st Qu.: 0.0000
## Median : 3960 Median : 0.00 Median :0.00000 Median : 0.0000
## Mean : 4594 Mean : 39.62 Mean :0.04599 Mean : 0.2508
## 3rd Qu.: 6960 3rd Qu.: 50.00 3rd Qu.:0.00000 3rd Qu.: 0.0000
## Max. :25920 Max. :3600.00 Max. :8.00000 Max. :40.0000
##
## no_of_style_change no_of_workers actual_productivity
## Min. :0.0000 Min. : 2.00 Min. :0.2337
## 1st Qu.:0.0000 1st Qu.: 9.00 1st Qu.:0.6517
## Median :0.0000 Median :34.00 Median :0.7592
## Mean :0.1321 Mean :33.47 Mean :0.7366
## 3rd Qu.:0.0000 3rd Qu.:57.00 3rd Qu.:0.8501
## Max. :2.0000 Max. :60.00 Max. :1.0507
##
##
## Summary of df_ 5 :
## date quarter department day
## Length:598 Length:598 Length:598 Length:598
## Class :character Class :character Class :character Class :character
## Mode :character Mode :character Mode :character Mode :character
##
##
##
##
## team targeted_productivity smv wip
## Min. : 1.000 Min. :0.0700 Min. : 2.90 Min. : 7.0
## 1st Qu.: 4.000 1st Qu.:0.7000 1st Qu.: 4.15 1st Qu.: 832.5
## Median : 7.000 Median :0.7500 Median :15.26 Median : 1054.0
## Mean : 6.761 Mean :0.7266 Mean :15.52 Mean : 1047.5
## 3rd Qu.:10.000 3rd Qu.:0.8000 3rd Qu.:25.90 3rd Qu.: 1239.8
## Max. :12.000 Max. :0.8000 Max. :54.56 Max. :12261.0
## NA's :240
## over_time incentive idle_time idle_men
## Min. : 0 Min. : 0.00 Min. :0.00000 Min. : 0.0000
## 1st Qu.: 1440 1st Qu.: 0.00 1st Qu.:0.00000 1st Qu.: 0.0000
## Median : 4080 Median : 23.00 Median :0.00000 Median : 0.0000
## Mean : 4469 Mean : 37.58 Mean :0.05017 Mean : 0.2926
## 3rd Qu.: 6840 3rd Qu.: 50.00 3rd Qu.:0.00000 3rd Qu.: 0.0000
## Max. :15120 Max. :2880.00 Max. :8.00000 Max. :40.0000
##
## no_of_style_change no_of_workers actual_productivity
## Min. :0.000 Min. : 2.00 Min. :0.2358
## 1st Qu.:0.000 1st Qu.: 9.00 1st Qu.:0.6286
## Median :0.000 Median :34.00 Median :0.7506
## Mean :0.194 Mean :35.15 Mean :0.7222
## 3rd Qu.:0.000 3rd Qu.:57.00 3rd Qu.:0.8502
## Max. :2.000 Max. :60.00 Max. :1.1005
##
The results show that productivity is stable, with most workers meeting expected performance levels. The average productivity across the sub-samples less than 1, with no major issues. However, there are some differences that need further investigation:
1-Idle time is higher in df_2 compared to the other samples, which could mean that some teams or departments experienced delays or downtime during production.
2-Overtime changes slightly between samples.This might be due to some teams working longer hours because of different workloads or delays.
3-Work-in-progress (WIP) is generally steady, but large increase in some samples suggest possible backlogs or delays in production.
We need to check which departments or teams are causing the differences in overtime and idle time. The next step is to group the data by department and calculate Z-scores to identify any anomalies.
###Scrutinize these sub-samples. Note: you might find group_by quite helpful here
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
# Group by department and summarize metrics for all sub-samples
for (i in 1:num_samples) {
cat("\nGrouped summary for df_", i, " (by department):\n")
group_summary <- get(paste0("df_", i)) %>%
group_by(department) %>%
summarize(
mean_productivity = mean(actual_productivity, na.rm = TRUE),
median_productivity = median(actual_productivity, na.rm = TRUE),
mean_overtime = mean(over_time, na.rm = TRUE),
median_overtime = median(over_time, na.rm = TRUE),
mean_idle_time = mean(idle_time, na.rm = TRUE),
median_idle_time = median(idle_time, na.rm = TRUE)
)
print(group_summary)
}
##
## Grouped summary for df_ 1 (by department):
## # A tibble: 3 × 7
## department mean_productivity median_productivity mean_overtime median_overtime
## <chr> <dbl> <dbl> <dbl> <dbl>
## 1 "finishin… 0.719 0.755 1727. 1080
## 2 "finishin… 0.802 0.865 2060 1440
## 3 "sweing" 0.728 0.751 6475. 6840
## # ℹ 2 more variables: mean_idle_time <dbl>, median_idle_time <dbl>
##
## Grouped summary for df_ 2 (by department):
## # A tibble: 3 × 7
## department mean_productivity median_productivity mean_overtime median_overtime
## <chr> <dbl> <dbl> <dbl> <dbl>
## 1 "finishin… 0.700 0.721 1626. 1080
## 2 "finishin… 0.766 0.821 2114. 1440
## 3 "sweing" 0.732 0.751 6450. 6840
## # ℹ 2 more variables: mean_idle_time <dbl>, median_idle_time <dbl>
##
## Grouped summary for df_ 3 (by department):
## # A tibble: 3 × 7
## department mean_productivity median_productivity mean_overtime median_overtime
## <chr> <dbl> <dbl> <dbl> <dbl>
## 1 "finishin… 0.740 0.772 1734. 1080
## 2 "finishin… 0.775 0.821 1873. 1440
## 3 "sweing" 0.698 0.750 6564. 6840
## # ℹ 2 more variables: mean_idle_time <dbl>, median_idle_time <dbl>
##
## Grouped summary for df_ 4 (by department):
## # A tibble: 3 × 7
## department mean_productivity median_productivity mean_overtime median_overtime
## <chr> <dbl> <dbl> <dbl> <dbl>
## 1 "finishin… 0.699 0.727 1955. 1080
## 2 "finishin… 0.811 0.828 1918. 1440
## 3 "sweing" 0.725 0.751 6737. 6840
## # ℹ 2 more variables: mean_idle_time <dbl>, median_idle_time <dbl>
##
## Grouped summary for df_ 5 (by department):
## # A tibble: 3 × 7
## department mean_productivity median_productivity mean_overtime median_overtime
## <chr> <dbl> <dbl> <dbl> <dbl>
## 1 "finishin… 0.702 0.749 1833. 1200
## 2 "finishin… 0.771 0.821 1706. 1440
## 3 "sweing" 0.715 0.750 6274. 6840
## # ℹ 2 more variables: mean_idle_time <dbl>, median_idle_time <dbl>
The grouped summaries show that productivity, overtime, and idle time behave differently across departments:
1-The productivity across the sub-samples is quite consistent, with df_1 having the highest average productivity at 0.7402 and df_3 having the lowest at 0.7239. The differences in productivity between these sub-samples are relatively small (ranging from 0.7239 to 0.7402), which suggests that the overall performance of the team or department remains stable across the sub-samples.
1-Overtime: The sewing department consistently works longer overtime (between 0 to and 25920 minutes) compared to the finishing department (around 0 to 10500 minutes). This large difference could mean that sewing teams are struggling with heavier workloads or delays.
3-In all sub-samples, the sewing department shows idle time increase ranging from 0 to 300 minutes, indicating inefficiencies. To improve, they should focus on better workflow management and task distribution. The finishing department, on the other hand, has no idle time, indicating smooth and efficient production. ### How Different Are They?
The sub-samples show some differences, especially when comparing productivity, overtime, and idle time across departments:
1-Productivity: There are small differences across sub-samples, but productivity is generally stable, with finishing having higher productivity compared to sewing.
2-Overtime: The sewing department consistently has much higher overtime (about 6,474 to 6,840 minutes) compared to finishing .However, the sewing department experienced a temporary increase in idle time (2.99 minutes in df_3), compared to much lower idle times in the other sub-samples
3-Idle Time:The finishing department shows almost no idle time in any sub-sample, indicating smooth production However, the sewing department experiences occasional increase, with df_3 showing an idle time of 2.99 minutes—an anomaly compared to other sub-samples where idle time is much lower.
1-Idle Time in Sewing (df_3): In df_3, the sewing department has an unusually high mean idle time of 2.99 minutes, compared to close to 0 in other sub-samples. This would be considered an anomaly in df_3 but not in sub-samples where idle time is consistently low.
2-Overtime The finishing department’s overtime stays consistently low across all sub-samples, showing well-managed workloads. However, the sewing department consistently has high overtime (between 6,564 and 6840 minutes), which could be due to extra workloads or delays in production that need further investigation.
1-High Finishing Productivity: The finishing department consistently shows high productivity across all sub-samples, indicating steady performance. Sewing
2-Overtime: The sewing department consistently works longer overtime in all sub-samples, suggesting a systemic workload or process-related issue.
Identify Consistent Patterns: The consistent high productivity of the finishing department suggests it is a reliable and efficient area of production.
Systemic Issues:The consistent overtime and occasional increases in idle time in the sewing department indicate possible delays or inefficiencies in the production process that require further investigation.
Random vs. Systemic Variations:While some variations, such as the high idle time in df_3, may be temporary, consistently high overtime across sub-samples points to a systemic workload issue that should be addressed.
# Combine all sub-samples into one data frame
combined_df <- do.call(rbind, lapply(1:num_samples, function(i) get(paste0("df_", i))))
str(combined_df)
## 'data.frame': 2990 obs. of 15 variables:
## $ date : chr "1/24/2015" "1/27/2015" "1/11/2015" "1/31/2015" ...
## $ quarter : chr "Quarter4" "Quarter4" "Quarter2" "Quarter5" ...
## $ department : chr "sweing" "finishing " "finishing " "finishing " ...
## $ day : chr "Saturday" "Tuesday" "Sunday" "Saturday" ...
## $ team : int 8 3 10 9 4 8 2 5 12 10 ...
## $ targeted_productivity: num 0.35 0.75 0.8 0.75 0.35 0.7 0.7 0.7 0.8 0.8 ...
## $ smv : num 15.09 3.94 3.94 3.94 4.3 ...
## $ wip : int 1448 NA NA NA NA NA 817 573 1026 1108 ...
## $ over_time : int 9360 1440 1440 240 3240 960 5520 6840 2880 6720 ...
## $ incentive : int 23 0 0 0 0 0 30 30 63 113 ...
## $ idle_time : num 0 0 0 0 0 0 0 0 0 0 ...
## $ idle_men : int 0 0 0 0 0 0 0 0 0 0 ...
## $ no_of_style_change : int 0 0 0 0 0 0 1 0 0 0 ...
## $ no_of_workers : num 52 12 8 2 18 8 45 57 34 56 ...
## $ actual_productivity : num 0.35 0.862 0.828 0.972 0.942 ...
# Calculate overall means and standard deviations
overall_mean_productivity <- mean(combined_df$actual_productivity, na.rm = TRUE)
overall_sd_productivity <- sd(combined_df$actual_productivity, na.rm = TRUE)
overall_mean_overtime <- mean(combined_df$over_time, na.rm = TRUE)
overall_sd_overtime <- sd(combined_df$over_time, na.rm = TRUE)
overall_mean_idle_time <- mean(combined_df$idle_time, na.rm = TRUE)
overall_sd_idle_time <- sd(combined_df$idle_time, na.rm = TRUE)
cat("\nOverall Summary Statistics:\n")
##
## Overall Summary Statistics:
cat("Mean Productivity: ", overall_mean_productivity, "\n")
## Mean Productivity: 0.7311311
cat("SD Productivity: ", overall_sd_productivity, "\n\n")
## SD Productivity: 0.1769174
cat("Mean Overtime: ", overall_mean_overtime, "\n")
## Mean Overtime: 4540.161
cat("SD Overtime: ", overall_sd_overtime, "\n\n")
## SD Overtime: 3350.28
cat("Mean Idle Time: ", overall_mean_idle_time, "\n")
## Mean Idle Time: 0.6525084
cat("SD Idle Time: ", overall_sd_idle_time, "\n")
## SD Idle Time: 12.05551
for (i in 1:num_samples) {
cat("\nZ-Scores for df_", i, ":\n")
sub_sample <- get(paste0("df_", i))
z_scores <- sub_sample %>%
group_by(department) %>%
summarize(
z_productivity = (mean(actual_productivity, na.rm = TRUE) - overall_mean_productivity) / overall_sd_productivity,
z_overtime = (mean(over_time, na.rm = TRUE) - overall_mean_overtime) / overall_sd_overtime,
z_idle_time = (mean(idle_time, na.rm = TRUE) - overall_mean_idle_time) / overall_sd_idle_time
)
print(z_scores)
}
##
## Z-Scores for df_ 1 :
## # A tibble: 3 × 4
## department z_productivity z_overtime z_idle_time
## <chr> <dbl> <dbl> <dbl>
## 1 "finishing" -0.0709 -0.840 -0.0541
## 2 "finishing " 0.399 -0.740 -0.0541
## 3 "sweing" -0.0156 0.577 0.0245
##
## Z-Scores for df_ 2 :
## # A tibble: 3 × 4
## department z_productivity z_overtime z_idle_time
## <chr> <dbl> <dbl> <dbl>
## 1 "finishing" -0.177 -0.870 -0.0541
## 2 "finishing " 0.195 -0.724 -0.0541
## 3 "sweing" 0.00663 0.570 0.0770
##
## Z-Scores for df_ 3 :
## # A tibble: 3 × 4
## department z_productivity z_overtime z_idle_time
## <chr> <dbl> <dbl> <dbl>
## 1 "finishing" 0.0501 -0.838 -0.0541
## 2 "finishing " 0.246 -0.796 -0.0541
## 3 "sweing" -0.186 0.604 0.194
##
## Z-Scores for df_ 4 :
## # A tibble: 3 × 4
## department z_productivity z_overtime z_idle_time
## <chr> <dbl> <dbl> <dbl>
## 1 "finishing" -0.183 -0.772 -0.0541
## 2 "finishing " 0.449 -0.783 -0.0541
## 3 "sweing" -0.0329 0.656 -0.0472
##
## Z-Scores for df_ 5 :
## # A tibble: 3 × 4
## department z_productivity z_overtime z_idle_time
## <chr> <dbl> <dbl> <dbl>
## 1 "finishing" -0.167 -0.808 -0.0541
## 2 "finishing " 0.228 -0.846 -0.0541
## 3 "sweing" -0.0898 0.518 -0.0472
After analyzing the summary statistics for each sub-sample (df_1 to df_5) and the overall data set, we calculated Z-scores to compare the performance of departments and detect any anomalies. The Z-scores highlight any values that deviate significantly from the overall mean, particularly for productivity, overtime, and idle time.
The results show that productivity in the finishing department only has minor variations across sub-samples, with no significant anomalies. The sewing department’s productivity remains stable and close to the overall mean, showing consistent performance.
However, overtime stands out as an area of concern. The sewing department consistently shows positive Z-scores between 0.5 and 0.6, meaning it regularly experiences higher-than-average overtime. This indicates a potential systemic issue, possibly due to workload or resource management challenges.
Idle time does not show any major concerns, as its Z-scores stay close to 0, suggesting no unexpected delays or downtime.
In conclusion, while productivity and idle time are well-managed, the consistently high overtime in the sewing department should be further investigated to understand its cause and implement solutions to improve overall efficiency.
identifying key factors that could be causing the high overtime in the sewing department by grouping the data and exploring relevant metrics like day, team, WIP, and quarter.
for (i in 1:num_samples) {
cat("\nAnalysis of Overtime Without Filtering (df_", i, "):\n")
overtime_analysis <- get(paste0("df_", i)) %>%
group_by(team, quarter, day) %>%
summarize(
mean_overtime = mean(over_time, na.rm = TRUE),
mean_productivity = mean(actual_productivity, na.rm = TRUE),
mean_idle_time = mean(idle_time, na.rm = TRUE)
)
print(overtime_analysis)
}
##
## Analysis of Overtime Without Filtering (df_ 1 ):
## `summarise()` has grouped output by 'team', 'quarter'. You can override using
## the `.groups` argument.
## # A tibble: 257 × 6
## # Groups: team, quarter [59]
## team quarter day mean_overtime mean_productivity mean_idle_time
## <int> <chr> <chr> <dbl> <dbl> <dbl>
## 1 1 Quarter1 Monday 4680 0.652 0
## 2 1 Quarter1 Saturday 1040 0.892 0
## 3 1 Quarter1 Sunday 4950 0.850 0
## 4 1 Quarter1 Thursday 6154. 0.800 0
## 5 1 Quarter1 Tuesday 6960 0.801 0
## 6 1 Quarter1 Wednesday 4170 0.926 0
## 7 1 Quarter2 Monday 4110 0.823 0
## 8 1 Quarter2 Saturday 6030 0.798 0
## 9 1 Quarter2 Sunday 6960 0.850 0
## 10 1 Quarter2 Thursday 1440 0.948 0
## # ℹ 247 more rows
##
## Analysis of Overtime Without Filtering (df_ 2 ):
## `summarise()` has grouped output by 'team', 'quarter'. You can override using
## the `.groups` argument.
## # A tibble: 254 × 6
## # Groups: team, quarter [56]
## team quarter day mean_overtime mean_productivity mean_idle_time
## <int> <chr> <chr> <dbl> <dbl> <dbl>
## 1 1 Quarter1 Monday 5480 0.702 0
## 2 1 Quarter1 Saturday 1080 0.767 0
## 3 1 Quarter1 Sunday 3210 0.725 0
## 4 1 Quarter1 Thursday 960 0.837 0
## 5 1 Quarter1 Tuesday 5050 0.833 0
## 6 1 Quarter1 Wednesday 4536 0.833 0
## 7 1 Quarter2 Monday 2520 0.928 0
## 8 1 Quarter2 Saturday 7272 0.879 0
## 9 1 Quarter2 Sunday 3060 0.888 0
## 10 1 Quarter2 Tuesday 480 0.891 0
## # ℹ 244 more rows
##
## Analysis of Overtime Without Filtering (df_ 3 ):
## `summarise()` has grouped output by 'team', 'quarter'. You can override using
## the `.groups` argument.
## # A tibble: 249 × 6
## # Groups: team, quarter [58]
## team quarter day mean_overtime mean_productivity mean_idle_time
## <int> <chr> <chr> <dbl> <dbl> <dbl>
## 1 1 Quarter1 Monday 7080 0.800 0
## 2 1 Quarter1 Saturday 3750 0.758 0
## 3 1 Quarter1 Sunday 4950 0.850 0
## 4 1 Quarter1 Thursday 4020 0.883 0
## 5 1 Quarter1 Tuesday 8160 0.884 0
## 6 1 Quarter2 Monday 1296 0.880 0
## 7 1 Quarter2 Saturday 10620 0.800 0
## 8 1 Quarter2 Sunday 7245 0.873 0
## 9 1 Quarter2 Thursday 480 1.05 0
## 10 1 Quarter2 Tuesday 960 0.881 0
## # ℹ 239 more rows
##
## Analysis of Overtime Without Filtering (df_ 4 ):
## `summarise()` has grouped output by 'team', 'quarter'. You can override using
## the `.groups` argument.
## # A tibble: 266 × 6
## # Groups: team, quarter [60]
## team quarter day mean_overtime mean_productivity mean_idle_time
## <int> <chr> <chr> <dbl> <dbl> <dbl>
## 1 1 Quarter1 Saturday 5550 0.804 0
## 2 1 Quarter1 Thursday 4170 0.923 0
## 3 1 Quarter1 Tuesday 5130 0.778 0
## 4 1 Quarter1 Wednesday 10620 0.851 0
## 5 1 Quarter2 Monday 720 0.875 0
## 6 1 Quarter2 Saturday 8352 0.920 0
## 7 1 Quarter2 Sunday 1080 0.892 0
## 8 1 Quarter2 Thursday 6030 0.758 0
## 9 1 Quarter2 Tuesday 3160 0.911 0
## 10 1 Quarter2 Wednesday 6030 0.846 0
## # ℹ 256 more rows
##
## Analysis of Overtime Without Filtering (df_ 5 ):
## `summarise()` has grouped output by 'team', 'quarter'. You can override using
## the `.groups` argument.
## # A tibble: 263 × 6
## # Groups: team, quarter [60]
## team quarter day mean_overtime mean_productivity mean_idle_time
## <int> <chr> <chr> <dbl> <dbl> <dbl>
## 1 1 Quarter1 Monday 2820 0.521 0
## 2 1 Quarter1 Saturday 1200 0.870 0
## 3 1 Quarter1 Sunday 2124 0.846 0
## 4 1 Quarter1 Thursday 3930 0.782 0
## 5 1 Quarter1 Tuesday 3360 0.555 0
## 6 1 Quarter1 Wednesday 3930 0.854 0
## 7 1 Quarter2 Monday 5150 0.886 0
## 8 1 Quarter2 Sunday 960 0.891 0
## 9 1 Quarter2 Thursday 0 1.10 0
## 10 1 Quarter2 Tuesday 4020 0.868 0
## # ℹ 253 more rows
This analysis helps identify patterns or trends in overtime across different teams, days, or quarters, and can guide decisions on improving productivity or managing workloads. Further investigation may be needed to understand why certain teams or days show higher overtime and how to optimize resources better.
library(ggplot2)
# Bar plot of mean overtime by team
ggplot(overtime_analysis, aes(x = as.factor(team), y = mean_overtime)) +
geom_bar(stat = "identity", fill = "lightblue") +
labs(title = "Mean Overtime by Team", x = "Team", y = "Mean Overtime") +
theme_minimal()
The visualization shows that Team 4 has the highest average overtime compared to other teams, suggesting they may be experiencing a heavier workload or inefficiencies. Further investigation is needed to understand the causes and optimize their operations, possibly focusing on task dependencies, staffing, or scheduling improvements.
# Bar plot of mean overtime by quarter
ggplot(overtime_analysis, aes(x = quarter, y = mean_overtime)) +
geom_bar(stat = "identity", fill = "dark blue") +
labs(title = "Mean Overtime by Quarter", x = "Quarter", y = "Mean Overtime") +
theme_minimal()
The bar plot shows the mean overtime across different quarters. It highlights that Quarter 2 and Quarter 1 have the highest overtime, while Quarter 5 shows a significant drop in overtime hours. This suggests that overtime is most prevalent in the first half of the year, with a noticeable decrease in the last quarter. Further analysis is needed to explore the reasons behind these fluctuations in overtime across quarters.
# Bar plot of mean overtime by day
ggplot(overtime_analysis, aes(x = day, y = mean_overtime)) +
geom_bar(stat = "identity", fill = "lightcoral") +
labs(title = "Mean Overtime by Day", x = "Day", y = "Mean Overtime") +
theme_minimal()
The bar plot indicates that Saturday and Thursday have the highest mean overtime, which suggests that these days experience more workload or operational challenges. Focusing on these two days for investigation can help uncover the factors driving overtime. By analyzing team coordination, task dependencies, and resource allocation, strategies can be developed to manage workloads more effectively and reduce overtime on these high-overtime days.
# Investigate tasks and workload for high-overtime teams (4)
team_task_investigation <- combined_df %>%
filter(team %in% c(4)) %>%
group_by(team) %>%
summarize(
avg_overtime = mean(over_time, na.rm = TRUE),
avg_idle_time = mean(idle_time, na.rm = TRUE),
avg_productivity = mean(actual_productivity, na.rm = TRUE)
) %>%
arrange(desc(avg_overtime))
print(team_task_investigation)
## # A tibble: 1 × 4
## team avg_overtime avg_idle_time avg_productivity
## <int> <dbl> <dbl> <dbl>
## 1 4 5565. 1.03 0.780
Team 4 has significant overtime (5564.948) with moderate idle time (1.03) and similar productivity (0.7805). Despite the overtime, productivity remains consistent, suggesting potential issues with task distribution, resource allocation, or team coordination. Further investigation is needed to understand the cause of the overtime and explore solutions such as optimizing task distribution and improving scheduling to reduce overtime and increase efficiency.
overtime_summary <- combined_df %>%
filter(team %in% c(4)) %>%
group_by(team, day) %>%
summarize(
avg_overtime = mean(over_time, na.rm = TRUE),
avg_idle_time = mean(idle_time, na.rm = TRUE),
avg_productivity = mean(actual_productivity, na.rm = TRUE)
) %>%
arrange(desc(avg_overtime))
## `summarise()` has grouped output by 'team'. You can override using the
## `.groups` argument.
print(overtime_summary)
## # A tibble: 6 × 5
## # Groups: team [1]
## team day avg_overtime avg_idle_time avg_productivity
## <int> <chr> <dbl> <dbl> <dbl>
## 1 4 Saturday 6992. 0 0.801
## 2 4 Thursday 6720 0 0.764
## 3 4 Sunday 5948. 0 0.780
## 4 4 Monday 5164. 0 0.781
## 5 4 Tuesday 4360. 0 0.758
## 6 4 Wednesday 4268. 6 0.797
based on the investigation, it was found that Saturday and Thursday are the days with the highest overtime for Team 4, aligning with the previously identified trend for high overtime across teams. This further confirms that certain days of the week, like Thursday and Saturday, are more demanding for Team 4, contributing to the high overtime.
This investigation suggests that further analysis of task distribution, scheduling, and resource allocation on these specific days is necessary to understand the causes of high overtime and to identify strategies to reduce it while maintaining productivity.
workload_investigation <- combined_df %>%
filter(team %in% c(4)) %>%
group_by(team, department) %>%
summarize(
avg_overtime = mean(over_time, na.rm = TRUE),
avg_idle_time = mean(idle_time, na.rm = TRUE),
avg_productivity = mean(actual_productivity, na.rm = TRUE)
) %>%
arrange(desc(avg_overtime))
## `summarise()` has grouped output by 'team'. You can override using the
## `.groups` argument.
print(workload_investigation)
## # A tibble: 3 × 5
## # Groups: team [1]
## team department avg_overtime avg_idle_time avg_productivity
## <int> <chr> <dbl> <dbl> <dbl>
## 1 4 "sweing" 7684. 1.75 0.737
## 2 4 "finishing " 3570. 0 0.892
## 3 4 "finishing" 1335. 0 0.785
Team 4’s sewing department shows the highest average overtime (7,683.51) and some idle time (1.75), suggesting possible workload imbalances or inefficiencies. The finishing department has lower overtime and no idle time, with slightly higher productivity (0.7846 vs. 0.7376). Further analysis is needed to explore the causes of high overtime and idle time in the sewing department, such as task distribution and workflow inefficiencies. Addressing these could improve overall productivity and reduce overtime.
idle_time_investigation <- combined_df %>%
filter(team == 4, department == "sweing") %>%
group_by(team, day) %>%
summarize(
avg_idle_time = mean(idle_time, na.rm = TRUE),
avg_overtime = mean(over_time, na.rm = TRUE),
avg_productivity = mean(actual_productivity, na.rm = TRUE)
) %>%
arrange(desc(avg_idle_time))
## `summarise()` has grouped output by 'team'. You can override using the
## `.groups` argument.
print(idle_time_investigation)
## # A tibble: 6 × 5
## # Groups: team [1]
## team day avg_idle_time avg_overtime avg_productivity
## <int> <chr> <dbl> <dbl> <dbl>
## 1 4 Wednesday 10.3 5823. 0.756
## 2 4 Monday 0 7312. 0.727
## 3 4 Saturday 0 9787. 0.750
## 4 4 Sunday 0 7559. 0.723
## 5 4 Thursday 0 8287. 0.719
## 6 4 Tuesday 0 7281. 0.755
Wednesday has the highest idle time for Team 4, but it doesn’t directly correspond to the highest overtime because overtime is influenced by workload and task distribution. On Thursday and Saturday, the team likely had more demanding tasks, leading to higher overtime. Wednesday had idle time, but it didn’t require extra work hours, which shows that overtime isn’t always linked to idle time.
library(dplyr)
daily_workload_investigation <- combined_df %>%
filter(team %in% c(4)) %>%
group_by(day) %>%
summarize(
avg_overtime = mean(over_time, na.rm = TRUE),
avg_idle_time = mean(idle_time, na.rm = TRUE),
avg_productivity = mean(actual_productivity, na.rm = TRUE)
) %>%
arrange(desc(avg_overtime))
print(daily_workload_investigation)
## # A tibble: 6 × 4
## day avg_overtime avg_idle_time avg_productivity
## <chr> <dbl> <dbl> <dbl>
## 1 Saturday 6992. 0 0.801
## 2 Thursday 6720 0 0.764
## 3 Sunday 5948. 0 0.780
## 4 Monday 5164. 0 0.781
## 5 Tuesday 4360. 0 0.758
## 6 Wednesday 4268. 6 0.797
The analysis shows that Saturday and Thursday have the highest overtime for Team 4. This likely results from increased workload, higher idle time, and lower productivity. To reduce overtime, it’s important to focus on improving task distribution, reducing idle time, and boosting productivity, especially on these high-overtime days.
# Investigate Overtime by Task Dependencies and Team Coordination
task_dependency_investigation <- combined_df %>%
filter(team %in% c(4)) %>%
group_by(team, department) %>%
summarize(
avg_overtime = mean(over_time, na.rm = TRUE),
avg_productivity = mean(actual_productivity, na.rm = TRUE),
total_tasks = n()
) %>%
arrange(desc(avg_overtime))
## `summarise()` has grouped output by 'team'. You can override using the
## `.groups` argument.
print(task_dependency_investigation)
## # A tibble: 3 × 5
## # Groups: team [1]
## team department avg_overtime avg_productivity total_tasks
## <int> <chr> <dbl> <dbl> <int>
## 1 4 "sweing" 7684. 0.737 171
## 2 4 "finishing " 3570. 0.892 65
## 3 4 "finishing" 1335. 0.785 55
The analysis show that the increase in overtime for Team 4 may be due to the imbalance in task distribution between departments, particularly the Sewing department. The sewing department handled a significantly higher number of tasks (171) compared to the finishing department (55), which could be putting extra pressure on the sewing team, leading to higher overtime hours. Further investigation is needed to determine if other factors, such as task complexity, inefficiencies, or staffing issues in the sewing department, are contributing to the increase in overtime. Additionally, understanding the coordination between the two departments could help in optimizing workload distribution and reducing overtime.
# Investigate overtime by department and team
team_comparison_investigation <- combined_df %>%
group_by(department, team) %>%
summarize(
avg_no_of_workers = mean(no_of_workers, na.rm = TRUE),
avg_overtime = mean(over_time, na.rm = TRUE),
avg_idle_time = mean(idle_time, na.rm = TRUE),
avg_productivity = mean(actual_productivity, na.rm = TRUE)
) %>%
arrange(desc(avg_overtime))
## `summarise()` has grouped output by 'department'. You can override using the
## `.groups` argument.
print(team_comparison_investigation)
## # A tibble: 36 × 6
## # Groups: department [3]
## department team avg_no_of_workers avg_overtime avg_idle_time
## <chr> <int> <dbl> <dbl> <dbl>
## 1 sweing 4 57.4 7684. 1.75
## 2 sweing 5 56.8 7406. 0.732
## 3 sweing 3 56.8 7016. 0
## 4 sweing 1 57.5 7000. 0
## 5 sweing 9 55.8 6928. 0
## 6 sweing 8 56.6 6900. 2.11
## 7 sweing 7 57.5 6845. 8.62
## 8 sweing 2 55.9 6817. 0.210
## 9 sweing 10 54.1 6702. 0.397
## 10 sweing 11 54.3 5817. 0.156
## # ℹ 26 more rows
## # ℹ 1 more variable: avg_productivity <dbl>
Team 4 in the sewing department has high overtime and idle time, but its productivity is not the highest. This means that despite working longer hours, the team is not accomplishing more during that time. The high idle time indicates that workers are available but not always fully engaged, which reduces productivity. Since productivity is not higher, Team 4 has to work more overtime to compensate for the imbalance and meet production goals. This suggests that extra hours are being spent to make up for inefficiencies or delays, as seen in the higher overtime and idle time.
# Analyze dependencies by grouping tasks and idle times
task_dependency_investigation <- combined_df %>%
group_by(team, department) %>%
summarize(
total_tasks = n(),
avg_idle_time = mean(idle_time, na.rm = TRUE),
avg_overtime = mean(over_time, na.rm = TRUE)
) %>%
arrange(desc(avg_idle_time))
## `summarise()` has grouped output by 'team'. You can override using the
## `.groups` argument.
print(task_dependency_investigation)
## # A tibble: 36 × 5
## # Groups: team [12]
## team department total_tasks avg_idle_time avg_overtime
## <int> <chr> <int> <dbl> <dbl>
## 1 7 "sweing" 129 8.62 6845.
## 2 8 "sweing" 160 2.11 6900.
## 3 4 "sweing" 171 1.75 7684.
## 4 5 "sweing" 140 0.732 7406.
## 5 10 "sweing" 121 0.397 6702.
## 6 2 "sweing" 124 0.210 6817.
## 7 11 "sweing" 154 0.156 5817.
## 8 1 "finishing" 56 0 1611.
## 9 1 "finishing " 57 0 1985.
## 10 1 "sweing" 135 0 7000.
## # ℹ 26 more rows
The analysis shows that the higher overtime for Team 4 is driven by two main factors:
Higher Number of Tasks: Team 4 in the sewing department is handling a higher number of tasks (171) compared to other teams. This heavier workload can lead to increased overtime to meet production targets.
Higher Idle Time: Despite having many tasks, Team 4 experiences higher idle time (1.75 hours). This suggests that workers are available but not always fully utilized, creating inefficiencies that further require overtime to complete tasks.
Addressing these two issues—optimizing task distribution and reducing idle time—could help
# Analyze team coordination and resource allocation
coordination_allocation_analysis <- combined_df %>%
filter(team %in% c(4)) %>%
group_by(team) %>%
summarize(
avg_no_of_workers = mean(no_of_workers, na.rm = TRUE),
total_tasks = n(),
avg_idle_time = mean(idle_time, na.rm = TRUE),
avg_overtime = mean(over_time, na.rm = TRUE),
avg_actual_productivity = mean(actual_productivity, na.rm = TRUE)
) %>%
arrange(desc(avg_overtime))
print(coordination_allocation_analysis)
## # A tibble: 1 × 6
## team avg_no_of_workers total_tasks avg_idle_time avg_overtime
## <int> <dbl> <int> <dbl> <dbl>
## 1 4 39.1 291 1.03 5565.
## # ℹ 1 more variable: avg_actual_productivity <dbl>
The results show that Thursday and Saturday have the highest average overtime, indicating potential issues with workload distribution or task dependencies on these days. Further investigation is needed to identify the causes, such as unplanned workload increases, task dependencies, staffing shortages, or delays in coordination with other departments. Understanding these factors will help develop strategies for better scheduling, task redistribution, and resource planning to reduce overtime.
# Analyze dependencies by grouping tasks and idle times
task_dependency_investigation <- combined_df %>%
group_by(team, department) %>%
summarize(
total_tasks = n(),
avg_idle_time = mean(idle_time, na.rm = TRUE),
avg_overtime = mean(over_time, na.rm = TRUE)
) %>%
arrange(desc(avg_idle_time))
## `summarise()` has grouped output by 'team'. You can override using the
## `.groups` argument.
print(task_dependency_investigation)
## # A tibble: 36 × 5
## # Groups: team [12]
## team department total_tasks avg_idle_time avg_overtime
## <int> <chr> <int> <dbl> <dbl>
## 1 7 "sweing" 129 8.62 6845.
## 2 8 "sweing" 160 2.11 6900.
## 3 4 "sweing" 171 1.75 7684.
## 4 5 "sweing" 140 0.732 7406.
## 5 10 "sweing" 121 0.397 6702.
## 6 2 "sweing" 124 0.210 6817.
## 7 11 "sweing" 154 0.156 5817.
## 8 1 "finishing" 56 0 1611.
## 9 1 "finishing " 57 0 1985.
## 10 1 "sweing" 135 0 7000.
## # ℹ 26 more rows
The analysis shows that the higher overtime for Team 4 is driven by two main factors:
Higher Number of Tasks: Team 4 in the sewing department is handling a higher number of tasks (171) compared to other teams. This heavier workload can lead to increased overtime to meet production targets.
Higher Idle Time: Despite having many tasks, Team 4 experiences higher idle time (1.75 hours). This suggests that workers are available but not always fully utilized, creating inefficiencies that further require overtime to complete tasks.
Addressing these two issues—optimizing task distribution and reducing idle time—could help
Monte Carlo simulations could be a useful tool to model the uncertainties in the factors contributing to overtime, such as task distribution, idle time, and productivity. By using Monte Carlo simulations, we can simulate different scenarios and assess the potential impact of various adjustments on Team 4’s overtime.
library(dplyr)
library(ggplot2)
mean_overtime <- mean(combined_df$over_time, na.rm = TRUE)
mean_idle_time <- mean(combined_df$idle_time, na.rm = TRUE)
mean_productivity <- mean(combined_df$actual_productivity, na.rm = TRUE)
sd_overtime <- sd(combined_df$over_time, na.rm = TRUE)
sd_idle_time <- sd(combined_df$idle_time, na.rm = TRUE)
sd_productivity <- sd(combined_df$actual_productivity, na.rm = TRUE)
print(mean_overtime)
## [1] 4540.161
print(mean_idle_time)
## [1] 0.6525084
print(mean_productivity)
## [1] 0.7311311
print(sd_overtime)
## [1] 3350.28
print(sd_idle_time)
## [1] 12.05551
print(sd_productivity)
## [1] 0.1769174
# Define the number of simulations
num_simulations <- 10000
set.seed(123)
simulated_overtime <- replicate(num_simulations,
{
overtime_sim <- rnorm(1, mean_overtime, sd_overtime)
idle_time_sim <- rnorm(1, mean_idle_time, sd_idle_time)
productivity_sim <- rnorm(1, mean_productivity, sd_productivity)
# Create a simple model: overtime depends on idle time and productivity
overtime_sim + (idle_time_sim * 0.5) - (productivity_sim * 0.2) # Modify weights as needed
})
summary(simulated_overtime)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## -8355 2200 4479 4516 6840 17683
# Plot the simulation results
ggplot(data.frame(simulated_overtime), aes(x = simulated_overtime)) +
geom_histogram(binwidth = 1000, fill = "skyblue", color = "black", alpha = 0.7) +
labs(title = "Monte Carlo Simulation of Overtime for Team 4",
x = "Simulated Overtime (Minutes)", y = "Frequency") +
theme_minimal()
Team 4 shows high overtime with an average of 4540.16 minutes, occasionally reaching up to 17,683 minutes, indicating periods of heavy workload. While the idle time is generally low (average of 0.65 minutes), there are occasional spikes (up to 2.99 minutes), suggesting periods of inefficiency. Addressing workload distribution and idle time could help reduce overtime and improve productivity.