A collection of 5 random samples of your data (with replacement).

We are simulating the act of collecting data .Each sub-sample should be as long as roughly 50% percent of your data. Store each sample set in a separate data frame (e.g., df_2 might be the second of these samples). Of course, these sub-samples should each include both categorical and continuous (numeric) data

data <- read.csv("C:/Users/rbada/Downloads/productivity+prediction+of+garment+employees/garments_worker_productivity.csv")
### Create 5 Random Sub-Samples (with Replacement)
set.seed(123)
num_samples <- 5  
sample_size <- round(0.5 * nrow(data))
sample_data_frames <- list()

for (i in 1:num_samples) {
  sample_df <- data[sample(1:nrow(data), size = sample_size, replace = TRUE), ]
  
  assign(paste0("df_", i), sample_df)
  sample_data_frames[[paste0("sample_", i)]] <- sample_df
}
for (i in 1:num_samples) {
  cat("Number of rows in df_", i, ": ", nrow(get(paste0("df_", i))), "\n")
  print(head(get(paste0("df_", i))))
}
## Number of rows in df_ 1 :  598 
##          date  quarter department       day team targeted_productivity   smv
## 415 1/24/2015 Quarter4     sweing  Saturday    8                  0.35 15.09
## 463 1/27/2015 Quarter4 finishing    Tuesday    3                  0.75  3.94
## 179 1/11/2015 Quarter2 finishing     Sunday   10                  0.80  3.94
## 526 1/31/2015 Quarter5 finishing   Saturday    9                  0.75  3.94
## 195 1/12/2015 Quarter2 finishing     Monday    4                  0.35  4.30
## 938 2/25/2015 Quarter4  finishing Wednesday    8                  0.70  4.60
##      wip over_time incentive idle_time idle_men no_of_style_change
## 415 1448      9360        23         0        0                  0
## 463   NA      1440         0         0        0                  0
## 179   NA      1440         0         0        0                  0
## 526   NA       240         0         0        0                  0
## 195   NA      3240         0         0        0                  0
## 938   NA       960         0         0        0                  0
##     no_of_workers actual_productivity
## 415            52           0.3499895
## 463            12           0.8618750
## 179             8           0.8282955
## 526             2           0.9718667
## 195            18           0.9422138
## 938             8           0.6585417
## Number of rows in df_ 2 :  598 
##           date  quarter department       day team targeted_productivity   smv
## 753  2/14/2015 Quarter2  finishing  Saturday   12                  0.80  4.08
## 398  1/24/2015 Quarter4     sweing  Saturday    4                  0.75 22.52
## 1179 3/11/2015 Quarter2     sweing Wednesday   12                  0.80 15.26
## 818  2/17/2015 Quarter3     sweing   Tuesday    9                  0.65 18.79
## 786  2/16/2015 Quarter3     sweing    Monday    1                  0.80 22.52
## 660   2/8/2015 Quarter2     sweing    Sunday    3                  0.80 22.52
##       wip over_time incentive idle_time idle_men no_of_style_change
## 753    NA      1080         0         0        0                  0
## 398   727     10260        94         0        0                  0
## 1179  470      4080        63         0        0                  0
## 818  2120      5520         0         0        0                  1
## 786  1422      6840       113         0        0                  0
## 660  1283      6720        88         0        0                  0
##      no_of_workers actual_productivity
## 753              9           0.8008889
## 398             57           0.9003211
## 1179            34           0.8004020
## 818             51           0.6501340
## 786             57           1.0002304
## 660             56           0.9001298
## Number of rows in df_ 3 :  598 
##          date  quarter department       day team targeted_productivity   smv
## 674  2/8/2015 Quarter2     sweing    Sunday    8                  0.70 24.26
## 719 2/11/2015 Quarter2     sweing Wednesday   10                  0.80 22.52
## 837 2/18/2015 Quarter3  finishing Wednesday    7                  0.70  5.13
## 183 1/11/2015 Quarter2     sweing    Sunday    2                  0.80 28.08
## 465 1/27/2015 Quarter4 finishing    Tuesday    8                  0.65  3.94
## 753 2/14/2015 Quarter2  finishing  Saturday   12                  0.80  4.08
##     wip over_time incentive idle_time idle_men no_of_style_change no_of_workers
## 674 154      6840         0         0        0                  0          57.0
## 719 598         0        75         0        0                  0          56.0
## 837  NA       960         0         0        0                  0           8.0
## 183 805     10530        63         0        0                  0          58.5
## 465  NA       960         0         0        0                  0           8.0
## 753  NA      1080         0         0        0                  0           9.0
##     actual_productivity
## 674           0.3532596
## 719           0.8503646
## 837           0.6718750
## 183           0.8000000
## 465           0.8454583
## 753           0.8008889
## Number of rows in df_ 4 :  598 
##          date  quarter department       day team targeted_productivity   smv
## 122  1/7/2015 Quarter1     sweing Wednesday    5                  0.70 21.98
## 471 1/27/2015 Quarter4     sweing   Tuesday    9                  0.70 29.12
## 215 1/12/2015 Quarter2     sweing    Monday    4                  0.35 22.40
## 489 1/28/2015 Quarter4     sweing Wednesday    9                  0.70 29.12
## 532 1/31/2015 Quarter5 finishing   Saturday    6                  0.60  3.94
## 522 1/31/2015 Quarter5 finishing   Saturday    3                  0.80  3.94
##      wip over_time incentive idle_time idle_men no_of_style_change
## 122  413      9720        40         0        0                  0
## 471 1294      6960        50         0        0                  0
## 215  581      7350         0         0        0                  0
## 489 1340      6960        63         0        0                  0
## 532   NA      1200         0         0        0                  0
## 522   NA       960         0         0        0                  0
##     no_of_workers actual_productivity
## 122          58.0           0.7004808
## 471          58.0           0.7003862
## 215          51.5           0.3506330
## 489          58.0           0.7505931
## 532          10.0           0.9718667
## 522           8.0           0.9718667
## Number of rows in df_ 5 :  598 
##           date  quarter department       day team targeted_productivity   smv
## 209  1/12/2015 Quarter2 finishing     Monday    8                   0.8  2.90
## 529  1/31/2015 Quarter5 finishing   Saturday    7                   0.7  3.94
## 483  1/28/2015 Quarter4     sweing Wednesday   10                   0.8 22.52
## 1130  3/9/2015 Quarter2  finishing    Monday   12                   0.8  4.60
## 1042  3/3/2015 Quarter1  finishing   Tuesday    1                   0.7  3.94
## 246  1/14/2015 Quarter2     sweing Wednesday    8                   0.8 25.90
##       wip over_time incentive idle_time idle_men no_of_style_change
## 209    NA      1440         0         0        0                  0
## 529    NA      1200         0         0        0                  0
## 483  1175      6720        60         0        0                  0
## 1130   NA         0      1080         0        0                  0
## 1042   NA      3360         0         0        0                  0
## 246  1218     10170        60         0        0                  0
##      no_of_workers actual_productivity
## 209            8.0           0.7250000
## 529           10.0           0.9718667
## 483           56.0           0.8505321
## 1130           9.0           0.9029630
## 1042           8.0           0.5554306
## 246           56.5           0.8501368

Summarize the Sub-Samples

for (i in 1:num_samples) {
  cat("\nSummary of df_", i, ":\n")
  print(summary(get(paste0("df_", i))))
}
## 
## Summary of df_ 1 :
##      date             quarter           department            day           
##  Length:598         Length:598         Length:598         Length:598        
##  Class :character   Class :character   Class :character   Class :character  
##  Mode  :character   Mode  :character   Mode  :character   Mode  :character  
##                                                                             
##                                                                             
##                                                                             
##                                                                             
##       team        targeted_productivity      smv             wip       
##  Min.   : 1.000   Min.   :0.3500        Min.   : 2.90   Min.   :   11  
##  1st Qu.: 3.000   1st Qu.:0.7000        1st Qu.: 3.94   1st Qu.:  783  
##  Median : 6.000   Median :0.7500        Median :15.26   Median : 1054  
##  Mean   : 6.283   Mean   :0.7242        Mean   :15.15   Mean   : 1196  
##  3rd Qu.: 9.000   3rd Qu.:0.8000        3rd Qu.:24.26   3rd Qu.: 1274  
##  Max.   :12.000   Max.   :0.8000        Max.   :51.02   Max.   :21540  
##                                                         NA's   :243    
##    over_time       incentive         idle_time           idle_men      
##  Min.   :    0   Min.   :   0.00   Min.   :  0.0000   Min.   : 0.0000  
##  1st Qu.: 1440   1st Qu.:   0.00   1st Qu.:  0.0000   1st Qu.: 0.0000  
##  Median : 4080   Median :  23.00   Median :  0.0000   Median : 0.0000  
##  Mean   : 4609   Mean   :  43.33   Mean   :  0.5627   Mean   : 0.3428  
##  3rd Qu.: 6960   3rd Qu.:  50.00   3rd Qu.:  0.0000   3rd Qu.: 0.0000  
##  Max.   :15120   Max.   :3600.00   Max.   :150.0000   Max.   :35.0000  
##                                                                        
##  no_of_style_change no_of_workers   actual_productivity
##  Min.   :0.0000     Min.   : 2.00   Min.   :0.2473     
##  1st Qu.:0.0000     1st Qu.:10.00   1st Qu.:0.6503     
##  Median :0.0000     Median :45.00   Median :0.7842     
##  Mean   :0.1421     Mean   :35.68   Mean   :0.7402     
##  3rd Qu.:0.0000     3rd Qu.:57.00   3rd Qu.:0.8506     
##  Max.   :2.0000     Max.   :60.00   Max.   :1.1204     
##                                                        
## 
## Summary of df_ 2 :
##      date             quarter           department            day           
##  Length:598         Length:598         Length:598         Length:598        
##  Class :character   Class :character   Class :character   Class :character  
##  Mode  :character   Mode  :character   Mode  :character   Mode  :character  
##                                                                             
##                                                                             
##                                                                             
##                                                                             
##       team        targeted_productivity      smv             wip        
##  Min.   : 1.000   Min.   :0.3500        Min.   : 2.90   Min.   :   7.0  
##  1st Qu.: 3.000   1st Qu.:0.7000        1st Qu.: 3.94   1st Qu.: 723.5  
##  Median : 6.000   Median :0.7500        Median :15.26   Median : 980.0  
##  Mean   : 6.319   Mean   :0.7355        Mean   :15.21   Mean   : 965.3  
##  3rd Qu.: 9.000   3rd Qu.:0.8000        3rd Qu.:24.26   3rd Qu.:1222.5  
##  Max.   :12.000   Max.   :0.8000        Max.   :51.02   Max.   :2120.0  
##                                                         NA's   :247     
##    over_time       incentive         idle_time           idle_men      
##  Min.   :    0   Min.   :   0.00   Min.   :  0.0000   Min.   : 0.0000  
##  1st Qu.: 1440   1st Qu.:   0.00   1st Qu.:  0.0000   1st Qu.: 0.0000  
##  Median : 4080   Median :  23.00   Median :  0.0000   Median : 0.0000  
##  Mean   : 4560   Mean   :  34.14   Mean   :  0.9281   Mean   : 0.2759  
##  3rd Qu.: 6960   3rd Qu.:  50.00   3rd Qu.:  0.0000   3rd Qu.: 0.0000  
##  Max.   :15000   Max.   :1200.00   Max.   :270.0000   Max.   :45.0000  
##                                                                        
##  no_of_style_change no_of_workers   actual_productivity
##  Min.   :0.0000     Min.   : 2.00   Min.   :0.2337     
##  1st Qu.:0.0000     1st Qu.: 8.00   1st Qu.:0.6502     
##  Median :0.0000     Median :34.00   Median :0.7667     
##  Mean   :0.1304     Mean   :34.64   Mean   :0.7326     
##  3rd Qu.:0.0000     3rd Qu.:57.00   3rd Qu.:0.8343     
##  Max.   :2.0000     Max.   :60.00   Max.   :1.1204     
##                                                        
## 
## Summary of df_ 3 :
##      date             quarter           department            day           
##  Length:598         Length:598         Length:598         Length:598        
##  Class :character   Class :character   Class :character   Class :character  
##  Mode  :character   Mode  :character   Mode  :character   Mode  :character  
##                                                                             
##                                                                             
##                                                                             
##                                                                             
##       team        targeted_productivity      smv             wip       
##  Min.   : 1.000   Min.   :0.0700        Min.   : 2.90   Min.   :    7  
##  1st Qu.: 4.000   1st Qu.:0.7000        1st Qu.: 3.94   1st Qu.:  730  
##  Median : 6.000   Median :0.7500        Median :12.52   Median :  983  
##  Mean   : 6.368   Mean   :0.7238        Mean   :14.90   Mean   : 1043  
##  3rd Qu.: 9.000   3rd Qu.:0.8000        3rd Qu.:24.26   3rd Qu.: 1194  
##  Max.   :12.000   Max.   :0.8000        Max.   :50.89   Max.   :12261  
##                                                         NA's   :263    
##    over_time       incentive        idle_time          idle_men      
##  Min.   :    0   Min.   :  0.00   Min.   :  0.000   Min.   : 0.0000  
##  1st Qu.: 1440   1st Qu.:  0.00   1st Qu.:  0.000   1st Qu.: 0.0000  
##  Median : 3840   Median :  0.00   Median :  0.000   Median : 0.0000  
##  Mean   : 4469   Mean   : 28.74   Mean   :  1.676   Mean   : 0.8478  
##  3rd Qu.: 6840   3rd Qu.: 50.00   3rd Qu.:  0.000   3rd Qu.: 0.0000  
##  Max.   :25920   Max.   :960.00   Max.   :300.000   Max.   :45.0000  
##                                                                      
##  no_of_style_change no_of_workers   actual_productivity
##  Min.   :0.0000     Min.   : 2.00   Min.   :0.2358     
##  1st Qu.:0.0000     1st Qu.: 8.00   1st Qu.:0.6316     
##  Median :0.0000     Median :34.00   Median :0.7552     
##  Mean   :0.1555     Mean   :33.93   Mean   :0.7239     
##  3rd Qu.:0.0000     3rd Qu.:57.00   3rd Qu.:0.8501     
##  Max.   :2.0000     Max.   :60.00   Max.   :1.1005     
##                                                        
## 
## Summary of df_ 4 :
##      date             quarter           department            day           
##  Length:598         Length:598         Length:598         Length:598        
##  Class :character   Class :character   Class :character   Class :character  
##  Mode  :character   Mode  :character   Mode  :character   Mode  :character  
##                                                                             
##                                                                             
##                                                                             
##                                                                             
##       team        targeted_productivity      smv             wip         
##  Min.   : 1.000   Min.   :0.0700        Min.   : 2.90   Min.   :    7.0  
##  1st Qu.: 4.000   1st Qu.:0.7000        1st Qu.: 3.94   1st Qu.:  810.5  
##  Median : 7.000   Median :0.7500        Median :14.89   Median : 1035.0  
##  Mean   : 6.674   Mean   :0.7286        Mean   :14.55   Mean   : 1134.1  
##  3rd Qu.: 9.750   3rd Qu.:0.8000        3rd Qu.:22.94   3rd Qu.: 1216.5  
##  Max.   :12.000   Max.   :0.8000        Max.   :54.56   Max.   :23122.0  
##                                                         NA's   :267      
##    over_time       incentive         idle_time          idle_men      
##  Min.   :    0   Min.   :   0.00   Min.   :0.00000   Min.   : 0.0000  
##  1st Qu.: 1440   1st Qu.:   0.00   1st Qu.:0.00000   1st Qu.: 0.0000  
##  Median : 3960   Median :   0.00   Median :0.00000   Median : 0.0000  
##  Mean   : 4594   Mean   :  39.62   Mean   :0.04599   Mean   : 0.2508  
##  3rd Qu.: 6960   3rd Qu.:  50.00   3rd Qu.:0.00000   3rd Qu.: 0.0000  
##  Max.   :25920   Max.   :3600.00   Max.   :8.00000   Max.   :40.0000  
##                                                                       
##  no_of_style_change no_of_workers   actual_productivity
##  Min.   :0.0000     Min.   : 2.00   Min.   :0.2337     
##  1st Qu.:0.0000     1st Qu.: 9.00   1st Qu.:0.6517     
##  Median :0.0000     Median :34.00   Median :0.7592     
##  Mean   :0.1321     Mean   :33.47   Mean   :0.7366     
##  3rd Qu.:0.0000     3rd Qu.:57.00   3rd Qu.:0.8501     
##  Max.   :2.0000     Max.   :60.00   Max.   :1.0507     
##                                                        
## 
## Summary of df_ 5 :
##      date             quarter           department            day           
##  Length:598         Length:598         Length:598         Length:598        
##  Class :character   Class :character   Class :character   Class :character  
##  Mode  :character   Mode  :character   Mode  :character   Mode  :character  
##                                                                             
##                                                                             
##                                                                             
##                                                                             
##       team        targeted_productivity      smv             wip         
##  Min.   : 1.000   Min.   :0.0700        Min.   : 2.90   Min.   :    7.0  
##  1st Qu.: 4.000   1st Qu.:0.7000        1st Qu.: 4.15   1st Qu.:  832.5  
##  Median : 7.000   Median :0.7500        Median :15.26   Median : 1054.0  
##  Mean   : 6.761   Mean   :0.7266        Mean   :15.52   Mean   : 1047.5  
##  3rd Qu.:10.000   3rd Qu.:0.8000        3rd Qu.:25.90   3rd Qu.: 1239.8  
##  Max.   :12.000   Max.   :0.8000        Max.   :54.56   Max.   :12261.0  
##                                                         NA's   :240      
##    over_time       incentive         idle_time          idle_men      
##  Min.   :    0   Min.   :   0.00   Min.   :0.00000   Min.   : 0.0000  
##  1st Qu.: 1440   1st Qu.:   0.00   1st Qu.:0.00000   1st Qu.: 0.0000  
##  Median : 4080   Median :  23.00   Median :0.00000   Median : 0.0000  
##  Mean   : 4469   Mean   :  37.58   Mean   :0.05017   Mean   : 0.2926  
##  3rd Qu.: 6840   3rd Qu.:  50.00   3rd Qu.:0.00000   3rd Qu.: 0.0000  
##  Max.   :15120   Max.   :2880.00   Max.   :8.00000   Max.   :40.0000  
##                                                                       
##  no_of_style_change no_of_workers   actual_productivity
##  Min.   :0.000      Min.   : 2.00   Min.   :0.2358     
##  1st Qu.:0.000      1st Qu.: 9.00   1st Qu.:0.6286     
##  Median :0.000      Median :34.00   Median :0.7506     
##  Mean   :0.194      Mean   :35.15   Mean   :0.7222     
##  3rd Qu.:0.000      3rd Qu.:57.00   3rd Qu.:0.8502     
##  Max.   :2.000      Max.   :60.00   Max.   :1.1005     
## 

The results show that productivity is stable, with most workers meeting expected performance levels. The average productivity across the sub-samples less than 1, with no major issues. However, there are some differences that need further investigation:

1-Idle time is higher in df_2 compared to the other samples, which could mean that some teams or departments experienced delays or downtime during production.

2-Overtime changes slightly between samples.This might be due to some teams working longer hours because of different workloads or delays.

3-Work-in-progress (WIP) is generally steady, but large increase in some samples suggest possible backlogs or delays in production.

We need to check which departments or teams are causing the differences in overtime and idle time. The next step is to group the data by department and calculate Z-scores to identify any anomalies.

###Scrutinize these sub-samples. Note: you might find group_by quite helpful here

Group by Departments and Summarize Metrics

library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
# Group by department and summarize metrics for all sub-samples
for (i in 1:num_samples) {
  cat("\nGrouped summary for df_", i, " (by department):\n")
  
  group_summary <- get(paste0("df_", i)) %>%
    group_by(department) %>%
    summarize(
      mean_productivity = mean(actual_productivity, na.rm = TRUE),
      median_productivity = median(actual_productivity, na.rm = TRUE),
      mean_overtime = mean(over_time, na.rm = TRUE),
      median_overtime = median(over_time, na.rm = TRUE),
      mean_idle_time = mean(idle_time, na.rm = TRUE),
      median_idle_time = median(idle_time, na.rm = TRUE)
    )
  
  print(group_summary)
}
## 
## Grouped summary for df_ 1  (by department):
## # A tibble: 3 × 7
##   department mean_productivity median_productivity mean_overtime median_overtime
##   <chr>                  <dbl>               <dbl>         <dbl>           <dbl>
## 1 "finishin…             0.719               0.755         1727.            1080
## 2 "finishin…             0.802               0.865         2060             1440
## 3 "sweing"               0.728               0.751         6475.            6840
## # ℹ 2 more variables: mean_idle_time <dbl>, median_idle_time <dbl>
## 
## Grouped summary for df_ 2  (by department):
## # A tibble: 3 × 7
##   department mean_productivity median_productivity mean_overtime median_overtime
##   <chr>                  <dbl>               <dbl>         <dbl>           <dbl>
## 1 "finishin…             0.700               0.721         1626.            1080
## 2 "finishin…             0.766               0.821         2114.            1440
## 3 "sweing"               0.732               0.751         6450.            6840
## # ℹ 2 more variables: mean_idle_time <dbl>, median_idle_time <dbl>
## 
## Grouped summary for df_ 3  (by department):
## # A tibble: 3 × 7
##   department mean_productivity median_productivity mean_overtime median_overtime
##   <chr>                  <dbl>               <dbl>         <dbl>           <dbl>
## 1 "finishin…             0.740               0.772         1734.            1080
## 2 "finishin…             0.775               0.821         1873.            1440
## 3 "sweing"               0.698               0.750         6564.            6840
## # ℹ 2 more variables: mean_idle_time <dbl>, median_idle_time <dbl>
## 
## Grouped summary for df_ 4  (by department):
## # A tibble: 3 × 7
##   department mean_productivity median_productivity mean_overtime median_overtime
##   <chr>                  <dbl>               <dbl>         <dbl>           <dbl>
## 1 "finishin…             0.699               0.727         1955.            1080
## 2 "finishin…             0.811               0.828         1918.            1440
## 3 "sweing"               0.725               0.751         6737.            6840
## # ℹ 2 more variables: mean_idle_time <dbl>, median_idle_time <dbl>
## 
## Grouped summary for df_ 5  (by department):
## # A tibble: 3 × 7
##   department mean_productivity median_productivity mean_overtime median_overtime
##   <chr>                  <dbl>               <dbl>         <dbl>           <dbl>
## 1 "finishin…             0.702               0.749         1833.            1200
## 2 "finishin…             0.771               0.821         1706.            1440
## 3 "sweing"               0.715               0.750         6274.            6840
## # ℹ 2 more variables: mean_idle_time <dbl>, median_idle_time <dbl>

The grouped summaries show that productivity, overtime, and idle time behave differently across departments:

1-The productivity across the sub-samples is quite consistent, with df_1 having the highest average productivity at 0.7402 and df_3 having the lowest at 0.7239. The differences in productivity between these sub-samples are relatively small (ranging from 0.7239 to 0.7402), which suggests that the overall performance of the team or department remains stable across the sub-samples.

1-Overtime: The sewing department consistently works longer overtime (between 0 to and 25920 minutes) compared to the finishing department (around 0 to 10500 minutes). This large difference could mean that sewing teams are struggling with heavier workloads or delays.

3-In all sub-samples, the sewing department shows idle time increase ranging from 0 to 300 minutes, indicating inefficiencies. To improve, they should focus on better workflow management and task distribution. The finishing department, on the other hand, has no idle time, indicating smooth and efficient production. ### How Different Are They?

The sub-samples show some differences, especially when comparing productivity, overtime, and idle time across departments:

1-Productivity: There are small differences across sub-samples, but productivity is generally stable, with finishing having higher productivity compared to sewing.

2-Overtime: The sewing department consistently has much higher overtime (about 6,474 to 6,840 minutes) compared to finishing .However, the sewing department experienced a temporary increase in idle time (2.99 minutes in df_3), compared to much lower idle times in the other sub-samples

3-Idle Time:The finishing department shows almost no idle time in any sub-sample, indicating smooth production However, the sewing department experiences occasional increase, with df_3 showing an idle time of 2.99 minutes—an anomaly compared to other sub-samples where idle time is much lower.

What Would You Have Called an Anomaly in One Sub-Sample That You Wouldn’t in Another?

1-Idle Time in Sewing (df_3): In df_3, the sewing department has an unusually high mean idle time of 2.99 minutes, compared to close to 0 in other sub-samples. This would be considered an anomaly in df_3 but not in sub-samples where idle time is consistently low.

2-Overtime The finishing department’s overtime stays consistently low across all sub-samples, showing well-managed workloads. However, the sewing department consistently has high overtime (between 6,564 and 6840 minutes), which could be due to extra workloads or delays in production that need further investigation.

Are there aspects of the data that are consistent among all sub-samples?

1-High Finishing Productivity: The finishing department consistently shows high productivity across all sub-samples, indicating steady performance. Sewing

2-Overtime: The sewing department consistently works longer overtime in all sub-samples, suggesting a systemic workload or process-related issue.

Consider how this investigation affects how you might draw conclusions about the data in the future.

Identify Consistent Patterns: The consistent high productivity of the finishing department suggests it is a reliable and efficient area of production.

Systemic Issues:The consistent overtime and occasional increases in idle time in the sewing department indicate possible delays or inefficiencies in the production process that require further investigation.

Random vs. Systemic Variations:While some variations, such as the high idle time in df_3, may be temporary, consistently high overtime across sub-samples points to a systemic workload issue that should be addressed.

Compare Sub-Sample Statistics to the Overall Summary to spot anomalies and deviation

# Combine all sub-samples into one data frame
combined_df <- do.call(rbind, lapply(1:num_samples, function(i) get(paste0("df_", i))))
str(combined_df)
## 'data.frame':    2990 obs. of  15 variables:
##  $ date                 : chr  "1/24/2015" "1/27/2015" "1/11/2015" "1/31/2015" ...
##  $ quarter              : chr  "Quarter4" "Quarter4" "Quarter2" "Quarter5" ...
##  $ department           : chr  "sweing" "finishing " "finishing " "finishing " ...
##  $ day                  : chr  "Saturday" "Tuesday" "Sunday" "Saturday" ...
##  $ team                 : int  8 3 10 9 4 8 2 5 12 10 ...
##  $ targeted_productivity: num  0.35 0.75 0.8 0.75 0.35 0.7 0.7 0.7 0.8 0.8 ...
##  $ smv                  : num  15.09 3.94 3.94 3.94 4.3 ...
##  $ wip                  : int  1448 NA NA NA NA NA 817 573 1026 1108 ...
##  $ over_time            : int  9360 1440 1440 240 3240 960 5520 6840 2880 6720 ...
##  $ incentive            : int  23 0 0 0 0 0 30 30 63 113 ...
##  $ idle_time            : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ idle_men             : int  0 0 0 0 0 0 0 0 0 0 ...
##  $ no_of_style_change   : int  0 0 0 0 0 0 1 0 0 0 ...
##  $ no_of_workers        : num  52 12 8 2 18 8 45 57 34 56 ...
##  $ actual_productivity  : num  0.35 0.862 0.828 0.972 0.942 ...
# Calculate overall means and standard deviations

overall_mean_productivity <- mean(combined_df$actual_productivity, na.rm = TRUE)
overall_sd_productivity <- sd(combined_df$actual_productivity, na.rm = TRUE)

overall_mean_overtime <- mean(combined_df$over_time, na.rm = TRUE)
overall_sd_overtime <- sd(combined_df$over_time, na.rm = TRUE)

overall_mean_idle_time <- mean(combined_df$idle_time, na.rm = TRUE)
overall_sd_idle_time <- sd(combined_df$idle_time, na.rm = TRUE)

cat("\nOverall Summary Statistics:\n")
## 
## Overall Summary Statistics:
cat("Mean Productivity: ", overall_mean_productivity, "\n")
## Mean Productivity:  0.7311311
cat("SD Productivity: ", overall_sd_productivity, "\n\n")
## SD Productivity:  0.1769174
cat("Mean Overtime: ", overall_mean_overtime, "\n")
## Mean Overtime:  4540.161
cat("SD Overtime: ", overall_sd_overtime, "\n\n")
## SD Overtime:  3350.28
cat("Mean Idle Time: ", overall_mean_idle_time, "\n")
## Mean Idle Time:  0.6525084
cat("SD Idle Time: ", overall_sd_idle_time, "\n")
## SD Idle Time:  12.05551
for (i in 1:num_samples) {
  cat("\nZ-Scores for df_", i, ":\n")
  
  sub_sample <- get(paste0("df_", i))
  
  z_scores <- sub_sample %>%
    group_by(department) %>%
    summarize(
      z_productivity = (mean(actual_productivity, na.rm = TRUE) - overall_mean_productivity) / overall_sd_productivity,
      z_overtime = (mean(over_time, na.rm = TRUE) - overall_mean_overtime) / overall_sd_overtime,
      z_idle_time = (mean(idle_time, na.rm = TRUE) - overall_mean_idle_time) / overall_sd_idle_time
    )
  
  print(z_scores)
}
## 
## Z-Scores for df_ 1 :
## # A tibble: 3 × 4
##   department   z_productivity z_overtime z_idle_time
##   <chr>                 <dbl>      <dbl>       <dbl>
## 1 "finishing"         -0.0709     -0.840     -0.0541
## 2 "finishing "         0.399      -0.740     -0.0541
## 3 "sweing"            -0.0156      0.577      0.0245
## 
## Z-Scores for df_ 2 :
## # A tibble: 3 × 4
##   department   z_productivity z_overtime z_idle_time
##   <chr>                 <dbl>      <dbl>       <dbl>
## 1 "finishing"        -0.177       -0.870     -0.0541
## 2 "finishing "        0.195       -0.724     -0.0541
## 3 "sweing"            0.00663      0.570      0.0770
## 
## Z-Scores for df_ 3 :
## # A tibble: 3 × 4
##   department   z_productivity z_overtime z_idle_time
##   <chr>                 <dbl>      <dbl>       <dbl>
## 1 "finishing"          0.0501     -0.838     -0.0541
## 2 "finishing "         0.246      -0.796     -0.0541
## 3 "sweing"            -0.186       0.604      0.194 
## 
## Z-Scores for df_ 4 :
## # A tibble: 3 × 4
##   department   z_productivity z_overtime z_idle_time
##   <chr>                 <dbl>      <dbl>       <dbl>
## 1 "finishing"         -0.183      -0.772     -0.0541
## 2 "finishing "         0.449      -0.783     -0.0541
## 3 "sweing"            -0.0329      0.656     -0.0472
## 
## Z-Scores for df_ 5 :
## # A tibble: 3 × 4
##   department   z_productivity z_overtime z_idle_time
##   <chr>                 <dbl>      <dbl>       <dbl>
## 1 "finishing"         -0.167      -0.808     -0.0541
## 2 "finishing "         0.228      -0.846     -0.0541
## 3 "sweing"            -0.0898      0.518     -0.0472

After analyzing the summary statistics for each sub-sample (df_1 to df_5) and the overall data set, we calculated Z-scores to compare the performance of departments and detect any anomalies. The Z-scores highlight any values that deviate significantly from the overall mean, particularly for productivity, overtime, and idle time.

The results show that productivity in the finishing department only has minor variations across sub-samples, with no significant anomalies. The sewing department’s productivity remains stable and close to the overall mean, showing consistent performance.

However, overtime stands out as an area of concern. The sewing department consistently shows positive Z-scores between 0.5 and 0.6, meaning it regularly experiences higher-than-average overtime. This indicates a potential systemic issue, possibly due to workload or resource management challenges.

Idle time does not show any major concerns, as its Z-scores stay close to 0, suggesting no unexpected delays or downtime.

In conclusion, while productivity and idle time are well-managed, the consistently high overtime in the sewing department should be further investigated to understand its cause and implement solutions to improve overall efficiency.

identifying key factors that could be causing the high overtime in the sewing department by grouping the data and exploring relevant metrics like day, team, WIP, and quarter.

for (i in 1:num_samples) {
  cat("\nAnalysis of Overtime Without Filtering (df_", i, "):\n")
  
  overtime_analysis <- get(paste0("df_", i)) %>%
    group_by(team, quarter, day) %>%
    summarize(
      mean_overtime = mean(over_time, na.rm = TRUE),
      mean_productivity = mean(actual_productivity, na.rm = TRUE),
      mean_idle_time = mean(idle_time, na.rm = TRUE)
    )
  
  print(overtime_analysis)
}
## 
## Analysis of Overtime Without Filtering (df_ 1 ):
## `summarise()` has grouped output by 'team', 'quarter'. You can override using
## the `.groups` argument.
## # A tibble: 257 × 6
## # Groups:   team, quarter [59]
##     team quarter  day       mean_overtime mean_productivity mean_idle_time
##    <int> <chr>    <chr>             <dbl>             <dbl>          <dbl>
##  1     1 Quarter1 Monday            4680              0.652              0
##  2     1 Quarter1 Saturday          1040              0.892              0
##  3     1 Quarter1 Sunday            4950              0.850              0
##  4     1 Quarter1 Thursday          6154.             0.800              0
##  5     1 Quarter1 Tuesday           6960              0.801              0
##  6     1 Quarter1 Wednesday         4170              0.926              0
##  7     1 Quarter2 Monday            4110              0.823              0
##  8     1 Quarter2 Saturday          6030              0.798              0
##  9     1 Quarter2 Sunday            6960              0.850              0
## 10     1 Quarter2 Thursday          1440              0.948              0
## # ℹ 247 more rows
## 
## Analysis of Overtime Without Filtering (df_ 2 ):
## `summarise()` has grouped output by 'team', 'quarter'. You can override using
## the `.groups` argument.
## # A tibble: 254 × 6
## # Groups:   team, quarter [56]
##     team quarter  day       mean_overtime mean_productivity mean_idle_time
##    <int> <chr>    <chr>             <dbl>             <dbl>          <dbl>
##  1     1 Quarter1 Monday             5480             0.702              0
##  2     1 Quarter1 Saturday           1080             0.767              0
##  3     1 Quarter1 Sunday             3210             0.725              0
##  4     1 Quarter1 Thursday            960             0.837              0
##  5     1 Quarter1 Tuesday            5050             0.833              0
##  6     1 Quarter1 Wednesday          4536             0.833              0
##  7     1 Quarter2 Monday             2520             0.928              0
##  8     1 Quarter2 Saturday           7272             0.879              0
##  9     1 Quarter2 Sunday             3060             0.888              0
## 10     1 Quarter2 Tuesday             480             0.891              0
## # ℹ 244 more rows
## 
## Analysis of Overtime Without Filtering (df_ 3 ):
## `summarise()` has grouped output by 'team', 'quarter'. You can override using
## the `.groups` argument.
## # A tibble: 249 × 6
## # Groups:   team, quarter [58]
##     team quarter  day      mean_overtime mean_productivity mean_idle_time
##    <int> <chr>    <chr>            <dbl>             <dbl>          <dbl>
##  1     1 Quarter1 Monday            7080             0.800              0
##  2     1 Quarter1 Saturday          3750             0.758              0
##  3     1 Quarter1 Sunday            4950             0.850              0
##  4     1 Quarter1 Thursday          4020             0.883              0
##  5     1 Quarter1 Tuesday           8160             0.884              0
##  6     1 Quarter2 Monday            1296             0.880              0
##  7     1 Quarter2 Saturday         10620             0.800              0
##  8     1 Quarter2 Sunday            7245             0.873              0
##  9     1 Quarter2 Thursday           480             1.05               0
## 10     1 Quarter2 Tuesday            960             0.881              0
## # ℹ 239 more rows
## 
## Analysis of Overtime Without Filtering (df_ 4 ):
## `summarise()` has grouped output by 'team', 'quarter'. You can override using
## the `.groups` argument.
## # A tibble: 266 × 6
## # Groups:   team, quarter [60]
##     team quarter  day       mean_overtime mean_productivity mean_idle_time
##    <int> <chr>    <chr>             <dbl>             <dbl>          <dbl>
##  1     1 Quarter1 Saturday           5550             0.804              0
##  2     1 Quarter1 Thursday           4170             0.923              0
##  3     1 Quarter1 Tuesday            5130             0.778              0
##  4     1 Quarter1 Wednesday         10620             0.851              0
##  5     1 Quarter2 Monday              720             0.875              0
##  6     1 Quarter2 Saturday           8352             0.920              0
##  7     1 Quarter2 Sunday             1080             0.892              0
##  8     1 Quarter2 Thursday           6030             0.758              0
##  9     1 Quarter2 Tuesday            3160             0.911              0
## 10     1 Quarter2 Wednesday          6030             0.846              0
## # ℹ 256 more rows
## 
## Analysis of Overtime Without Filtering (df_ 5 ):
## `summarise()` has grouped output by 'team', 'quarter'. You can override using
## the `.groups` argument.
## # A tibble: 263 × 6
## # Groups:   team, quarter [60]
##     team quarter  day       mean_overtime mean_productivity mean_idle_time
##    <int> <chr>    <chr>             <dbl>             <dbl>          <dbl>
##  1     1 Quarter1 Monday             2820             0.521              0
##  2     1 Quarter1 Saturday           1200             0.870              0
##  3     1 Quarter1 Sunday             2124             0.846              0
##  4     1 Quarter1 Thursday           3930             0.782              0
##  5     1 Quarter1 Tuesday            3360             0.555              0
##  6     1 Quarter1 Wednesday          3930             0.854              0
##  7     1 Quarter2 Monday             5150             0.886              0
##  8     1 Quarter2 Sunday              960             0.891              0
##  9     1 Quarter2 Thursday              0             1.10               0
## 10     1 Quarter2 Tuesday            4020             0.868              0
## # ℹ 253 more rows

This analysis helps identify patterns or trends in overtime across different teams, days, or quarters, and can guide decisions on improving productivity or managing workloads. Further investigation may be needed to understand why certain teams or days show higher overtime and how to optimize resources better.

visualization spot for investigation

library(ggplot2)

# Bar plot of mean overtime by team
ggplot(overtime_analysis, aes(x = as.factor(team), y = mean_overtime)) +
  geom_bar(stat = "identity", fill = "lightblue") +
  labs(title = "Mean Overtime by Team", x = "Team", y = "Mean Overtime") +
  theme_minimal()

The visualization shows that Team 4 has the highest average overtime compared to other teams, suggesting they may be experiencing a heavier workload or inefficiencies. Further investigation is needed to understand the causes and optimize their operations, possibly focusing on task dependencies, staffing, or scheduling improvements.

# Bar plot of mean overtime by quarter
ggplot(overtime_analysis, aes(x = quarter, y = mean_overtime)) +
  geom_bar(stat = "identity", fill = "dark blue") +
  labs(title = "Mean Overtime by Quarter", x = "Quarter", y = "Mean Overtime") +
  theme_minimal()

The bar plot shows the mean overtime across different quarters. It highlights that Quarter 2 and Quarter 1 have the highest overtime, while Quarter 5 shows a significant drop in overtime hours. This suggests that overtime is most prevalent in the first half of the year, with a noticeable decrease in the last quarter. Further analysis is needed to explore the reasons behind these fluctuations in overtime across quarters.

# Bar plot of mean overtime by day
ggplot(overtime_analysis, aes(x = day, y = mean_overtime)) +
  geom_bar(stat = "identity", fill = "lightcoral") +
  labs(title = "Mean Overtime by Day", x = "Day", y = "Mean Overtime") +
  theme_minimal()

The bar plot indicates that Saturday and Thursday have the highest mean overtime, which suggests that these days experience more workload or operational challenges. Focusing on these two days for investigation can help uncover the factors driving overtime. By analyzing team coordination, task dependencies, and resource allocation, strategies can be developed to manage workloads more effectively and reduce overtime on these high-overtime days.

Investigate Team Workloads

# Investigate tasks and workload for high-overtime teams (4)
team_task_investigation <- combined_df %>%
  filter(team %in% c(4)) %>%
  group_by(team) %>%
  summarize(
    avg_overtime = mean(over_time, na.rm = TRUE),
    avg_idle_time = mean(idle_time, na.rm = TRUE),
    avg_productivity = mean(actual_productivity, na.rm = TRUE)
  ) %>%
  arrange(desc(avg_overtime))

print(team_task_investigation)
## # A tibble: 1 × 4
##    team avg_overtime avg_idle_time avg_productivity
##   <int>        <dbl>         <dbl>            <dbl>
## 1     4        5565.          1.03            0.780

Team 4 has significant overtime (5564.948) with moderate idle time (1.03) and similar productivity (0.7805). Despite the overtime, productivity remains consistent, suggesting potential issues with task distribution, resource allocation, or team coordination. Further investigation is needed to understand the cause of the overtime and explore solutions such as optimizing task distribution and improving scheduling to reduce overtime and increase efficiency.

overtime_summary <- combined_df %>%
  filter(team %in% c(4)) %>%
  group_by(team, day) %>%
  summarize(
    avg_overtime = mean(over_time, na.rm = TRUE),
    avg_idle_time = mean(idle_time, na.rm = TRUE),
    avg_productivity = mean(actual_productivity, na.rm = TRUE)
  ) %>%
  arrange(desc(avg_overtime))
## `summarise()` has grouped output by 'team'. You can override using the
## `.groups` argument.
print(overtime_summary)
## # A tibble: 6 × 5
## # Groups:   team [1]
##    team day       avg_overtime avg_idle_time avg_productivity
##   <int> <chr>            <dbl>         <dbl>            <dbl>
## 1     4 Saturday         6992.             0            0.801
## 2     4 Thursday         6720              0            0.764
## 3     4 Sunday           5948.             0            0.780
## 4     4 Monday           5164.             0            0.781
## 5     4 Tuesday          4360.             0            0.758
## 6     4 Wednesday        4268.             6            0.797

based on the investigation, it was found that Saturday and Thursday are the days with the highest overtime for Team 4, aligning with the previously identified trend for high overtime across teams. This further confirms that certain days of the week, like Thursday and Saturday, are more demanding for Team 4, contributing to the high overtime.

This investigation suggests that further analysis of task distribution, scheduling, and resource allocation on these specific days is necessary to understand the causes of high overtime and to identify strategies to reduce it while maintaining productivity.

workload_investigation <- combined_df %>%
filter(team %in% c(4)) %>%  
  group_by(team, department) %>%  
  summarize(
    avg_overtime = mean(over_time, na.rm = TRUE),  
    avg_idle_time = mean(idle_time, na.rm = TRUE),  
    avg_productivity = mean(actual_productivity, na.rm = TRUE)
  ) %>%
  arrange(desc(avg_overtime))  
## `summarise()` has grouped output by 'team'. You can override using the
## `.groups` argument.
print(workload_investigation)
## # A tibble: 3 × 5
## # Groups:   team [1]
##    team department   avg_overtime avg_idle_time avg_productivity
##   <int> <chr>               <dbl>         <dbl>            <dbl>
## 1     4 "sweing"            7684.          1.75            0.737
## 2     4 "finishing "        3570.          0               0.892
## 3     4 "finishing"         1335.          0               0.785

Team 4’s sewing department shows the highest average overtime (7,683.51) and some idle time (1.75), suggesting possible workload imbalances or inefficiencies. The finishing department has lower overtime and no idle time, with slightly higher productivity (0.7846 vs. 0.7376). Further analysis is needed to explore the causes of high overtime and idle time in the sewing department, such as task distribution and workflow inefficiencies. Addressing these could improve overall productivity and reduce overtime.

idle_time_investigation <- combined_df %>%
  filter(team == 4, department == "sweing") %>%  
  group_by(team, day) %>%
  summarize(
    avg_idle_time = mean(idle_time, na.rm = TRUE),
    avg_overtime = mean(over_time, na.rm = TRUE),
    avg_productivity = mean(actual_productivity, na.rm = TRUE)
  ) %>%
  arrange(desc(avg_idle_time))  
## `summarise()` has grouped output by 'team'. You can override using the
## `.groups` argument.
print(idle_time_investigation)
## # A tibble: 6 × 5
## # Groups:   team [1]
##    team day       avg_idle_time avg_overtime avg_productivity
##   <int> <chr>             <dbl>        <dbl>            <dbl>
## 1     4 Wednesday          10.3        5823.            0.756
## 2     4 Monday              0          7312.            0.727
## 3     4 Saturday            0          9787.            0.750
## 4     4 Sunday              0          7559.            0.723
## 5     4 Thursday            0          8287.            0.719
## 6     4 Tuesday             0          7281.            0.755

Wednesday has the highest idle time for Team 4, but it doesn’t directly correspond to the highest overtime because overtime is influenced by workload and task distribution. On Thursday and Saturday, the team likely had more demanding tasks, leading to higher overtime. Wednesday had idle time, but it didn’t require extra work hours, which shows that overtime isn’t always linked to idle time.

library(dplyr)

daily_workload_investigation <- combined_df %>%
  filter(team %in% c(4)) %>%  
  group_by(day) %>%
  summarize(
    avg_overtime = mean(over_time, na.rm = TRUE),
    avg_idle_time = mean(idle_time, na.rm = TRUE),
    avg_productivity = mean(actual_productivity, na.rm = TRUE)
  ) %>%
  arrange(desc(avg_overtime))  

print(daily_workload_investigation)
## # A tibble: 6 × 4
##   day       avg_overtime avg_idle_time avg_productivity
##   <chr>            <dbl>         <dbl>            <dbl>
## 1 Saturday         6992.             0            0.801
## 2 Thursday         6720              0            0.764
## 3 Sunday           5948.             0            0.780
## 4 Monday           5164.             0            0.781
## 5 Tuesday          4360.             0            0.758
## 6 Wednesday        4268.             6            0.797

The analysis shows that Saturday and Thursday have the highest overtime for Team 4. This likely results from increased workload, higher idle time, and lower productivity. To reduce overtime, it’s important to focus on improving task distribution, reducing idle time, and boosting productivity, especially on these high-overtime days.

# Investigate Overtime by Task Dependencies and Team Coordination
task_dependency_investigation <- combined_df %>%
  filter(team %in% c(4)) %>%  
  group_by(team, department) %>%
  summarize(
    avg_overtime = mean(over_time, na.rm = TRUE),
    avg_productivity = mean(actual_productivity, na.rm = TRUE),
    total_tasks = n()  
  ) %>%
  arrange(desc(avg_overtime))
## `summarise()` has grouped output by 'team'. You can override using the
## `.groups` argument.
print(task_dependency_investigation)
## # A tibble: 3 × 5
## # Groups:   team [1]
##    team department   avg_overtime avg_productivity total_tasks
##   <int> <chr>               <dbl>            <dbl>       <int>
## 1     4 "sweing"            7684.            0.737         171
## 2     4 "finishing "        3570.            0.892          65
## 3     4 "finishing"         1335.            0.785          55

The analysis show that the increase in overtime for Team 4 may be due to the imbalance in task distribution between departments, particularly the Sewing department. The sewing department handled a significantly higher number of tasks (171) compared to the finishing department (55), which could be putting extra pressure on the sewing team, leading to higher overtime hours. Further investigation is needed to determine if other factors, such as task complexity, inefficiencies, or staffing issues in the sewing department, are contributing to the increase in overtime. Additionally, understanding the coordination between the two departments could help in optimizing workload distribution and reducing overtime.

# Investigate overtime by department and team
team_comparison_investigation <- combined_df %>%
  group_by(department, team) %>%
  summarize(
    avg_no_of_workers = mean(no_of_workers, na.rm = TRUE),
    avg_overtime = mean(over_time, na.rm = TRUE),
    avg_idle_time = mean(idle_time, na.rm = TRUE),
    avg_productivity = mean(actual_productivity, na.rm = TRUE)
  ) %>%
  arrange(desc(avg_overtime)) 
## `summarise()` has grouped output by 'department'. You can override using the
## `.groups` argument.
print(team_comparison_investigation)
## # A tibble: 36 × 6
## # Groups:   department [3]
##    department  team avg_no_of_workers avg_overtime avg_idle_time
##    <chr>      <int>             <dbl>        <dbl>         <dbl>
##  1 sweing         4              57.4        7684.         1.75 
##  2 sweing         5              56.8        7406.         0.732
##  3 sweing         3              56.8        7016.         0    
##  4 sweing         1              57.5        7000.         0    
##  5 sweing         9              55.8        6928.         0    
##  6 sweing         8              56.6        6900.         2.11 
##  7 sweing         7              57.5        6845.         8.62 
##  8 sweing         2              55.9        6817.         0.210
##  9 sweing        10              54.1        6702.         0.397
## 10 sweing        11              54.3        5817.         0.156
## # ℹ 26 more rows
## # ℹ 1 more variable: avg_productivity <dbl>

Team 4 in the sewing department has high overtime and idle time, but its productivity is not the highest. This means that despite working longer hours, the team is not accomplishing more during that time. The high idle time indicates that workers are available but not always fully engaged, which reduces productivity. Since productivity is not higher, Team 4 has to work more overtime to compensate for the imbalance and meet production goals. This suggests that extra hours are being spent to make up for inefficiencies or delays, as seen in the higher overtime and idle time.

# Analyze dependencies by grouping tasks and idle times
task_dependency_investigation <- combined_df %>%
  group_by(team, department) %>%
  summarize(
    total_tasks = n(),
    avg_idle_time = mean(idle_time, na.rm = TRUE),
    avg_overtime = mean(over_time, na.rm = TRUE)
  ) %>%
  arrange(desc(avg_idle_time))
## `summarise()` has grouped output by 'team'. You can override using the
## `.groups` argument.
print(task_dependency_investigation)
## # A tibble: 36 × 5
## # Groups:   team [12]
##     team department   total_tasks avg_idle_time avg_overtime
##    <int> <chr>              <int>         <dbl>        <dbl>
##  1     7 "sweing"             129         8.62         6845.
##  2     8 "sweing"             160         2.11         6900.
##  3     4 "sweing"             171         1.75         7684.
##  4     5 "sweing"             140         0.732        7406.
##  5    10 "sweing"             121         0.397        6702.
##  6     2 "sweing"             124         0.210        6817.
##  7    11 "sweing"             154         0.156        5817.
##  8     1 "finishing"           56         0            1611.
##  9     1 "finishing "          57         0            1985.
## 10     1 "sweing"             135         0            7000.
## # ℹ 26 more rows

The analysis shows that the higher overtime for Team 4 is driven by two main factors:

Higher Number of Tasks: Team 4 in the sewing department is handling a higher number of tasks (171) compared to other teams. This heavier workload can lead to increased overtime to meet production targets.

Higher Idle Time: Despite having many tasks, Team 4 experiences higher idle time (1.75 hours). This suggests that workers are available but not always fully utilized, creating inefficiencies that further require overtime to complete tasks.

Addressing these two issues—optimizing task distribution and reducing idle time—could help

# Analyze team coordination and resource allocation
coordination_allocation_analysis <- combined_df %>%
  filter(team %in% c(4)) %>%  
  group_by(team) %>%
  summarize(
    avg_no_of_workers = mean(no_of_workers, na.rm = TRUE),
    total_tasks = n(),
    avg_idle_time = mean(idle_time, na.rm = TRUE),
    avg_overtime = mean(over_time, na.rm = TRUE),
    avg_actual_productivity = mean(actual_productivity, na.rm = TRUE)
  ) %>%
  arrange(desc(avg_overtime))

print(coordination_allocation_analysis)
## # A tibble: 1 × 6
##    team avg_no_of_workers total_tasks avg_idle_time avg_overtime
##   <int>             <dbl>       <int>         <dbl>        <dbl>
## 1     4              39.1         291          1.03        5565.
## # ℹ 1 more variable: avg_actual_productivity <dbl>

The results show that Thursday and Saturday have the highest average overtime, indicating potential issues with workload distribution or task dependencies on these days. Further investigation is needed to identify the causes, such as unplanned workload increases, task dependencies, staffing shortages, or delays in coordination with other departments. Understanding these factors will help develop strategies for better scheduling, task redistribution, and resource planning to reduce overtime.

# Analyze dependencies by grouping tasks and idle times
task_dependency_investigation <- combined_df %>%
  group_by(team, department) %>%
  summarize(
    total_tasks = n(),
    avg_idle_time = mean(idle_time, na.rm = TRUE),
    avg_overtime = mean(over_time, na.rm = TRUE)
  ) %>%
  arrange(desc(avg_idle_time))
## `summarise()` has grouped output by 'team'. You can override using the
## `.groups` argument.
print(task_dependency_investigation)
## # A tibble: 36 × 5
## # Groups:   team [12]
##     team department   total_tasks avg_idle_time avg_overtime
##    <int> <chr>              <int>         <dbl>        <dbl>
##  1     7 "sweing"             129         8.62         6845.
##  2     8 "sweing"             160         2.11         6900.
##  3     4 "sweing"             171         1.75         7684.
##  4     5 "sweing"             140         0.732        7406.
##  5    10 "sweing"             121         0.397        6702.
##  6     2 "sweing"             124         0.210        6817.
##  7    11 "sweing"             154         0.156        5817.
##  8     1 "finishing"           56         0            1611.
##  9     1 "finishing "          57         0            1985.
## 10     1 "sweing"             135         0            7000.
## # ℹ 26 more rows

The analysis shows that the higher overtime for Team 4 is driven by two main factors:

Higher Number of Tasks: Team 4 in the sewing department is handling a higher number of tasks (171) compared to other teams. This heavier workload can lead to increased overtime to meet production targets.

Higher Idle Time: Despite having many tasks, Team 4 experiences higher idle time (1.75 hours). This suggests that workers are available but not always fully utilized, creating inefficiencies that further require overtime to complete tasks.

Addressing these two issues—optimizing task distribution and reducing idle time—could help

Monte Carlo simulations

Monte Carlo simulations could be a useful tool to model the uncertainties in the factors contributing to overtime, such as task distribution, idle time, and productivity. By using Monte Carlo simulations, we can simulate different scenarios and assess the potential impact of various adjustments on Team 4’s overtime.

library(dplyr)
library(ggplot2)
mean_overtime <- mean(combined_df$over_time, na.rm = TRUE)
mean_idle_time <- mean(combined_df$idle_time, na.rm = TRUE)
mean_productivity <- mean(combined_df$actual_productivity, na.rm = TRUE)
sd_overtime <- sd(combined_df$over_time, na.rm = TRUE)
sd_idle_time <- sd(combined_df$idle_time, na.rm = TRUE)
sd_productivity <- sd(combined_df$actual_productivity, na.rm = TRUE)
print(mean_overtime)
## [1] 4540.161
print(mean_idle_time)
## [1] 0.6525084
print(mean_productivity)
## [1] 0.7311311
print(sd_overtime)
## [1] 3350.28
print(sd_idle_time)
## [1] 12.05551
print(sd_productivity)
## [1] 0.1769174
# Define the number of simulations
num_simulations <- 10000
set.seed(123) 

simulated_overtime <- replicate(num_simulations,
  {
  overtime_sim <- rnorm(1, mean_overtime, sd_overtime)
  idle_time_sim <- rnorm(1, mean_idle_time, sd_idle_time)
  productivity_sim <- rnorm(1, mean_productivity, sd_productivity)
  
  # Create a simple model: overtime depends on idle time and productivity
  overtime_sim + (idle_time_sim * 0.5) - (productivity_sim * 0.2)  # Modify weights as needed
})
summary(simulated_overtime)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##   -8355    2200    4479    4516    6840   17683
# Plot the simulation results
ggplot(data.frame(simulated_overtime), aes(x = simulated_overtime)) +
  geom_histogram(binwidth = 1000, fill = "skyblue", color = "black", alpha = 0.7) +
  labs(title = "Monte Carlo Simulation of Overtime for Team 4",
       x = "Simulated Overtime (Minutes)", y = "Frequency") +
  theme_minimal()

Team 4 shows high overtime with an average of 4540.16 minutes, occasionally reaching up to 17,683 minutes, indicating periods of heavy workload. While the idle time is generally low (average of 0.65 minutes), there are occasional spikes (up to 2.99 minutes), suggesting periods of inefficiency. Addressing workload distribution and idle time could help reduce overtime and improve productivity.