data <- read.csv("C:\\Users\\Krishna\\Downloads\\productivity+prediction+of+garment+employees\\garments_worker_productivity.csv")
str(data)
## 'data.frame': 1197 obs. of 15 variables:
## $ date : chr "01-01-2015" "01-01-2015" "01-01-2015" "01-01-2015" ...
## $ quarter : chr "Quarter1" "Quarter1" "Quarter1" "Quarter1" ...
## $ department : chr "sweing" "finishing " "sweing" "sweing" ...
## $ day : chr "Thursday" "Thursday" "Thursday" "Thursday" ...
## $ team : int 8 1 11 12 6 7 2 3 2 1 ...
## $ targeted_productivity: num 0.8 0.75 0.8 0.8 0.8 0.8 0.75 0.75 0.75 0.75 ...
## $ smv : num 26.16 3.94 11.41 11.41 25.9 ...
## $ wip : int 1108 NA 968 968 1170 984 NA 795 733 681 ...
## $ over_time : int 7080 960 3660 3660 1920 6720 960 6900 6000 6900 ...
## $ incentive : int 98 0 50 50 50 38 0 45 34 45 ...
## $ idle_time : num 0 0 0 0 0 0 0 0 0 0 ...
## $ idle_men : int 0 0 0 0 0 0 0 0 0 0 ...
## $ no_of_style_change : int 0 0 0 0 0 0 0 0 0 0 ...
## $ no_of_workers : num 59 8 30.5 30.5 56 56 8 57.5 55 57.5 ...
## $ actual_productivity : num 0.941 0.886 0.801 0.801 0.8 ...
# Set seed for reproducibility
set.seed(42)
# Function to create subsamples
create_subsample <- function(data, sample_fraction) {
sample_size <- round(sample_fraction * nrow(data))
random_indices <- sample(1:nrow(data), size = sample_size, replace = TRUE)
return(data[random_indices, ])
}
# Create 5 subsamples
subsample_list <- lapply(1:5, function(i) create_subsample(data, 0.5))
cat("First 5 rows of each subsample:\n")
## First 5 rows of each subsample:
for (i in 1:5) {
print(head(subsample_list[[i]]))
}
## date quarter department day team targeted_productivity smv
## 561 02-01-2015 Quarter1 finishing Sunday 6 0.60 2.90
## 321 1/19/2015 Quarter3 finishing Monday 5 0.65 4.15
## 1177 03-11-2015 Quarter2 finishing Wednesday 4 0.75 3.94
## 1098 03-07-2015 Quarter1 sweing Saturday 10 0.70 21.82
## 1170 03-10-2015 Quarter2 sweing Tuesday 7 0.60 30.48
## 634 02-05-2015 Quarter1 sweing Thursday 7 0.07 24.26
## wip over_time incentive idle_time idle_men no_of_style_change
## 561 NA 1200 0 0 0 0
## 321 NA 1440 0 0 0 0
## 1177 NA 1920 0 0 0 0
## 1098 1251 6000 30 0 0 0
## 1170 1017 6840 25 0 0 1
## 634 1608 6960 0 0 0 0
## no_of_workers actual_productivity
## 561 10 0.3141667
## 321 8 0.9589015
## 1177 8 0.8133090
## 1098 50 0.7004220
## 1170 57 0.6304029
## 634 58 0.5228448
## date quarter department day team targeted_productivity smv
## 17 01-01-2015 Quarter1 finishing Thursday 7 0.8 2.90
## 482 1/28/2015 Quarter4 finishing Wednesday 2 0.8 3.94
## 532 1/31/2015 Quarter5 finishing Saturday 6 0.6 3.94
## 76 01-05-2015 Quarter1 sweing Monday 4 0.8 28.08
## 1112 03-08-2015 Quarter2 finishing Sunday 2 0.7 3.90
## 998 03-01-2015 Quarter1 finishing Sunday 4 0.6 3.94
## wip over_time incentive idle_time idle_men no_of_style_change
## 17 NA 960 0 0 0 0
## 482 NA 1800 0 0 0 0
## 532 NA 1200 0 0 0 0
## 76 759 6900 50 0 0 0
## 1112 NA 1200 0 0 0 0
## 998 NA 960 0 0 0 0
## no_of_workers actual_productivity
## 17 8.0 0.5407292
## 482 15.0 0.9022222
## 532 10.0 0.9718667
## 76 57.5 0.8000765
## 1112 10.0 0.8840000
## 998 8.0 0.6730833
## date quarter department day team targeted_productivity smv
## 383 1/22/2015 Quarter4 finishing Thursday 4 0.75 4.30
## 353 1/21/2015 Quarter3 finishing Wednesday 4 0.75 4.30
## 383.1 1/22/2015 Quarter4 finishing Thursday 4 0.75 4.30
## 155 01-10-2015 Quarter2 sweing Saturday 8 0.80 25.90
## 204 01-12-2015 Quarter2 finishing Monday 5 0.65 4.15
## 760 2/14/2015 Quarter2 sweing Saturday 8 0.70 30.10
## wip over_time incentive idle_time idle_men no_of_style_change
## 383 NA 9000 0 0 0 0
## 353 NA 6000 0 0 0 0
## 383.1 NA 9000 0 0 0 0
## 155 1158 10170 60 0 0 0
## 204 NA 1440 0 0 0 0
## 760 824 5160 0 0 0 1
## no_of_workers actual_productivity
## 383 15.0 0.6702160
## 353 20.0 0.9785256
## 383.1 15.0 0.6702160
## 155 56.5 0.8501368
## 204 8.0 0.8095644
## 760 60.0 0.7002061
## date quarter department day team targeted_productivity smv
## 980 2/28/2015 Quarter4 sweing Saturday 1 0.50 26.66
## 100 01-06-2015 Quarter1 sweing Tuesday 10 0.75 28.08
## 1004 03-01-2015 Quarter1 finishing Sunday 9 0.75 2.90
## 265 1/15/2015 Quarter3 sweing Thursday 1 0.80 26.16
## 1092 03-07-2015 Quarter1 finishing Saturday 5 0.35 3.94
## 351 1/20/2015 Quarter3 sweing Tuesday 7 0.35 22.94
## wip over_time incentive idle_time idle_men no_of_style_change
## 980 1448 6840 30 0 0 2
## 100 1209 10530 45 0 0 0
## 1004 NA 960 0 0 0 0
## 265 1160 10620 75 0 0 0
## 1092 NA 2400 0 0 0 0
## 351 1450 10080 30 0 0 0
## no_of_workers actual_productivity
## 980 57.0 0.5503497
## 100 58.5 0.7505455
## 1004 8.0 0.4470833
## 265 59.0 0.8505023
## 1092 10.0 0.8057500
## 351 56.0 0.4003328
## date quarter department day team targeted_productivity smv
## 1179 03-11-2015 Quarter2 sweing Wednesday 12 0.80 15.26
## 426 1/25/2015 Quarter4 sweing Sunday 3 0.75 22.52
## 1007 03-02-2015 Quarter1 finishing Monday 3 0.80 4.60
## 632 02-05-2015 Quarter1 sweing Thursday 6 0.70 18.79
## 532 1/31/2015 Quarter5 finishing Saturday 6 0.60 3.94
## 363 1/21/2015 Quarter3 sweing Wednesday 3 0.70 22.52
## wip over_time incentive idle_time idle_men no_of_style_change
## 1179 470 4080 63 0 0 0
## 426 1495 10350 55 0 0 0
## 1007 NA 10080 0 0 0 0
## 632 766 2760 30 0 0 0
## 532 NA 1200 0 0 0 0
## 363 1139 10260 63 0 0 0
## no_of_workers actual_productivity
## 1179 34.0 0.8004020
## 426 57.5 0.8005133
## 1007 24.0 0.8603704
## 632 33.0 0.7000790
## 532 10.0 0.9718667
## 363 57.0 0.7500680
analyze_subsample <- function(subsample, subsample_number) {
cat("Analysis for Subsample", subsample_number, "\n")
# Display summary statistics
summary_stats <- summary(subsample)
print(summary_stats)
# Analyze anomalies in 'quarter' and 'department'
anomalies <- as.data.frame(table(subsample$quarter, subsample$department))
colnames(anomalies) <- c("quarter", "department", "Count")
cat("Anomalies in Quarter and Department:\n")
print(anomalies)
# Perform Monte Carlo simulation for 'actual_productivity' mean
column_to_simulate <- "actual_productivity"
num_simulations <- 1000
simulated_means <- replicate(num_simulations, mean(sample(subsample[[column_to_simulate]], replace = TRUE)))
cat("Monte Carlo Simulation for", column_to_simulate, "Mean:\n")
hist(simulated_means, main = paste("Distribution of Simulated Means for", column_to_simulate))
}
# Analyze each subsample
lapply(1:5, function(i) analyze_subsample(subsample_list[[i]], i))
## Analysis for Subsample 1
## date quarter department day
## Length:598 Length:598 Length:598 Length:598
## Class :character Class :character Class :character Class :character
## Mode :character Mode :character Mode :character Mode :character
##
##
##
##
## team targeted_productivity smv wip
## Min. : 1.000 Min. :0.0700 Min. : 2.90 Min. : 7
## 1st Qu.: 3.000 1st Qu.:0.7000 1st Qu.: 3.94 1st Qu.: 767
## Median : 6.000 Median :0.7500 Median :14.89 Median : 1015
## Mean : 6.328 Mean :0.7255 Mean :14.75 Mean : 1104
## 3rd Qu.: 9.000 3rd Qu.:0.8000 3rd Qu.:23.54 3rd Qu.: 1246
## Max. :12.000 Max. :0.8000 Max. :49.10 Max. :21540
## NA's :263
## over_time incentive idle_time idle_men
## Min. : 0 Min. : 0.00 Min. : 0.0000 Min. : 0.0000
## 1st Qu.: 1440 1st Qu.: 0.00 1st Qu.: 0.0000 1st Qu.: 0.0000
## Median : 3480 Median : 0.00 Median : 0.0000 Median : 0.0000
## Mean : 4382 Mean : 44.63 Mean : 0.3244 Mean : 0.3679
## 3rd Qu.: 6840 3rd Qu.: 50.00 3rd Qu.: 0.0000 3rd Qu.: 0.0000
## Max. :13800 Max. :3600.00 Max. :150.0000 Max. :35.0000
##
## no_of_style_change no_of_workers actual_productivity
## Min. :0.0000 Min. : 2.00 Min. :0.2337
## 1st Qu.:0.0000 1st Qu.: 9.00 1st Qu.:0.6501
## Median :0.0000 Median :34.00 Median :0.7958
## Mean :0.1605 Mean :33.95 Mean :0.7346
## 3rd Qu.:0.0000 3rd Qu.:57.00 3rd Qu.:0.8503
## Max. :2.0000 Max. :89.00 Max. :1.1204
##
## Anomalies in Quarter and Department:
## quarter department Count
## 1 Quarter1 finishing 36
## 2 Quarter2 finishing 52
## 3 Quarter3 finishing 19
## 4 Quarter4 finishing 32
## 5 Quarter5 finishing 0
## 6 Quarter1 finishing 35
## 7 Quarter2 finishing 28
## 8 Quarter3 finishing 21
## 9 Quarter4 finishing 23
## 10 Quarter5 finishing 17
## 11 Quarter1 sweing 90
## 12 Quarter2 sweing 106
## 13 Quarter3 sweing 56
## 14 Quarter4 sweing 71
## 15 Quarter5 sweing 12
## Monte Carlo Simulation for actual_productivity Mean:
## Analysis for Subsample 2
## date quarter department day
## Length:598 Length:598 Length:598 Length:598
## Class :character Class :character Class :character Class :character
## Mode :character Mode :character Mode :character Mode :character
##
##
##
##
## team targeted_productivity smv wip
## Min. : 1.000 Min. :0.3500 Min. : 2.90 Min. : 10.0
## 1st Qu.: 4.000 1st Qu.:0.7000 1st Qu.: 3.94 1st Qu.: 766.5
## Median : 6.000 Median :0.7500 Median :16.75 Median : 1052.0
## Mean : 6.462 Mean :0.7343 Mean :15.58 Mean : 1147.4
## 3rd Qu.: 9.000 3rd Qu.:0.8000 3rd Qu.:25.90 3rd Qu.: 1228.0
## Max. :12.000 Max. :0.8000 Max. :54.56 Max. :21266.0
## NA's :239
## over_time incentive idle_time idle_men
## Min. : 0 Min. : 0.00 Min. : 0.0000 Min. : 0.0000
## 1st Qu.: 1440 1st Qu.: 0.00 1st Qu.: 0.0000 1st Qu.: 0.0000
## Median : 4080 Median : 23.00 Median : 0.0000 Median : 0.0000
## Mean : 4643 Mean : 43.22 Mean : 0.9916 Mean : 0.3094
## 3rd Qu.: 6960 3rd Qu.: 50.00 3rd Qu.: 0.0000 3rd Qu.: 0.0000
## Max. :25920 Max. :2880.00 Max. :270.0000 Max. :45.0000
##
## no_of_style_change no_of_workers actual_productivity
## Min. :0.0000 Min. : 2.00 Min. :0.2462
## 1st Qu.:0.0000 1st Qu.: 9.00 1st Qu.:0.6516
## Median :0.0000 Median :34.00 Median :0.7663
## Mean :0.1405 Mean :35.24 Mean :0.7302
## 3rd Qu.:0.0000 3rd Qu.:57.00 3rd Qu.:0.8501
## Max. :2.0000 Max. :60.00 Max. :1.1204
##
## Anomalies in Quarter and Department:
## quarter department Count
## 1 Quarter1 finishing 35
## 2 Quarter2 finishing 36
## 3 Quarter3 finishing 22
## 4 Quarter4 finishing 22
## 5 Quarter5 finishing 0
## 6 Quarter1 finishing 35
## 7 Quarter2 finishing 30
## 8 Quarter3 finishing 20
## 9 Quarter4 finishing 28
## 10 Quarter5 finishing 11
## 11 Quarter1 sweing 128
## 12 Quarter2 sweing 98
## 13 Quarter3 sweing 53
## 14 Quarter4 sweing 73
## 15 Quarter5 sweing 7
## Monte Carlo Simulation for actual_productivity Mean:
## Analysis for Subsample 3
## date quarter department day
## Length:598 Length:598 Length:598 Length:598
## Class :character Class :character Class :character Class :character
## Mode :character Mode :character Mode :character Mode :character
##
##
##
##
## team targeted_productivity smv wip
## Min. : 1.000 Min. :0.3500 Min. : 2.90 Min. : 7
## 1st Qu.: 3.000 1st Qu.:0.7000 1st Qu.: 3.94 1st Qu.: 784
## Median : 6.000 Median :0.7500 Median :15.26 Median : 1035
## Mean : 6.308 Mean :0.7258 Mean :15.38 Mean : 1226
## 3rd Qu.: 9.000 3rd Qu.:0.8000 3rd Qu.:24.26 3rd Qu.: 1263
## Max. :12.000 Max. :0.8000 Max. :54.56 Max. :21540
## NA's :249
## over_time incentive idle_time idle_men
## Min. : 0 Min. : 0.00 Min. :0.00000 Min. : 0.0000
## 1st Qu.: 1440 1st Qu.: 0.00 1st Qu.:0.00000 1st Qu.: 0.0000
## Median : 4080 Median : 23.00 Median :0.00000 Median : 0.0000
## Mean : 4640 Mean : 35.68 Mean :0.03345 Mean : 0.1839
## 3rd Qu.: 6960 3rd Qu.: 50.00 3rd Qu.:0.00000 3rd Qu.: 0.0000
## Max. :25920 Max. :3600.00 Max. :5.00000 Max. :30.0000
##
## no_of_style_change no_of_workers actual_productivity
## Min. :0.0000 Min. : 2.0 Min. :0.2380
## 1st Qu.:0.0000 1st Qu.:10.0 1st Qu.:0.6520
## Median :0.0000 Median :34.0 Median :0.7782
## Mean :0.1589 Mean :34.9 Mean :0.7331
## 3rd Qu.:0.0000 3rd Qu.:57.0 3rd Qu.:0.8501
## Max. :2.0000 Max. :60.0 Max. :1.0336
##
## Anomalies in Quarter and Department:
## quarter department Count
## 1 Quarter1 finishing 40
## 2 Quarter2 finishing 25
## 3 Quarter3 finishing 19
## 4 Quarter4 finishing 20
## 5 Quarter5 finishing 0
## 6 Quarter1 finishing 27
## 7 Quarter2 finishing 47
## 8 Quarter3 finishing 26
## 9 Quarter4 finishing 31
## 10 Quarter5 finishing 14
## 11 Quarter1 sweing 110
## 12 Quarter2 sweing 87
## 13 Quarter3 sweing 65
## 14 Quarter4 sweing 75
## 15 Quarter5 sweing 12
## Monte Carlo Simulation for actual_productivity Mean:
## Analysis for Subsample 4
## date quarter department day
## Length:598 Length:598 Length:598 Length:598
## Class :character Class :character Class :character Class :character
## Mode :character Mode :character Mode :character Mode :character
##
##
##
##
## team targeted_productivity smv wip
## Min. : 1.000 Min. :0.3500 Min. : 2.90 Min. : 10.0
## 1st Qu.: 4.000 1st Qu.:0.7000 1st Qu.: 4.08 1st Qu.: 761.8
## Median : 7.000 Median :0.7500 Median :15.26 Median : 1042.0
## Mean : 6.736 Mean :0.7296 Mean :15.20 Mean : 1245.6
## 3rd Qu.:10.000 3rd Qu.:0.8000 3rd Qu.:24.26 3rd Qu.: 1263.2
## Max. :12.000 Max. :0.8000 Max. :50.89 Max. :21540.0
## NA's :240
## over_time incentive idle_time idle_men
## Min. : 0 Min. : 0.00 Min. : 0.0000 Min. : 0.0000
## 1st Qu.: 1440 1st Qu.: 0.00 1st Qu.: 0.0000 1st Qu.: 0.0000
## Median : 4080 Median : 23.00 Median : 0.0000 Median : 0.0000
## Mean : 4646 Mean : 36.44 Mean : 0.5418 Mean : 0.2542
## 3rd Qu.: 6960 3rd Qu.: 50.00 3rd Qu.: 0.0000 3rd Qu.: 0.0000
## Max. :25920 Max. :3600.00 Max. :300.0000 Max. :37.0000
##
## no_of_style_change no_of_workers actual_productivity
## Min. :0.0000 Min. : 2.00 Min. :0.2580
## 1st Qu.:0.0000 1st Qu.: 9.00 1st Qu.:0.6502
## Median :0.0000 Median :34.00 Median :0.7507
## Mean :0.1505 Mean :35.03 Mean :0.7264
## 3rd Qu.:0.0000 3rd Qu.:57.00 3rd Qu.:0.8193
## Max. :2.0000 Max. :60.00 Max. :1.1204
##
## Anomalies in Quarter and Department:
## quarter department Count
## 1 Quarter1 finishing 36
## 2 Quarter2 finishing 33
## 3 Quarter3 finishing 18
## 4 Quarter4 finishing 31
## 5 Quarter5 finishing 0
## 6 Quarter1 finishing 44
## 7 Quarter2 finishing 18
## 8 Quarter3 finishing 18
## 9 Quarter4 finishing 30
## 10 Quarter5 finishing 12
## 11 Quarter1 sweing 117
## 12 Quarter2 sweing 89
## 13 Quarter3 sweing 68
## 14 Quarter4 sweing 74
## 15 Quarter5 sweing 10
## Monte Carlo Simulation for actual_productivity Mean:
## Analysis for Subsample 5
## date quarter department day
## Length:598 Length:598 Length:598 Length:598
## Class :character Class :character Class :character Class :character
## Mode :character Mode :character Mode :character Mode :character
##
##
##
##
## team targeted_productivity smv wip
## Min. : 1.000 Min. :0.3500 Min. : 2.90 Min. : 10
## 1st Qu.: 3.000 1st Qu.:0.7000 1st Qu.: 3.94 1st Qu.: 749
## Median : 6.000 Median :0.7500 Median :15.26 Median : 1027
## Mean : 6.415 Mean :0.7324 Mean :15.27 Mean : 1195
## 3rd Qu.: 9.000 3rd Qu.:0.8000 3rd Qu.:24.26 3rd Qu.: 1255
## Max. :12.000 Max. :0.8000 Max. :51.02 Max. :23122
## NA's :253
## over_time incentive idle_time idle_men
## Min. : 0 Min. : 0.00 Min. : 0.0000 Min. : 0.0000
## 1st Qu.: 1440 1st Qu.: 0.00 1st Qu.: 0.0000 1st Qu.: 0.0000
## Median : 3960 Median : 0.00 Median : 0.0000 Median : 0.0000
## Mean : 4583 Mean : 33.48 Mean : 0.2291 Mean : 0.3762
## 3rd Qu.: 6960 3rd Qu.: 50.00 3rd Qu.: 0.0000 3rd Qu.: 0.0000
## Max. :25920 Max. :3600.00 Max. :90.0000 Max. :35.0000
##
## no_of_style_change no_of_workers actual_productivity
## Min. :0.0000 Min. : 2.00 Min. :0.2380
## 1st Qu.:0.0000 1st Qu.: 9.00 1st Qu.:0.6568
## Median :0.0000 Median :34.00 Median :0.7555
## Mean :0.1388 Mean :34.53 Mean :0.7334
## 3rd Qu.:0.0000 3rd Qu.:57.00 3rd Qu.:0.8396
## Max. :2.0000 Max. :60.00 Max. :1.1204
##
## Anomalies in Quarter and Department:
## quarter department Count
## 1 Quarter1 finishing 43
## 2 Quarter2 finishing 31
## 3 Quarter3 finishing 22
## 4 Quarter4 finishing 28
## 5 Quarter5 finishing 0
## 6 Quarter1 finishing 48
## 7 Quarter2 finishing 27
## 8 Quarter3 finishing 15
## 9 Quarter4 finishing 28
## 10 Quarter5 finishing 11
## 11 Quarter1 sweing 95
## 12 Quarter2 sweing 97
## 13 Quarter3 sweing 77
## 14 Quarter4 sweing 71
## 15 Quarter5 sweing 5
## Monte Carlo Simulation for actual_productivity Mean:
## [[1]]
## $breaks
## [1] 0.710 0.715 0.720 0.725 0.730 0.735 0.740 0.745 0.750 0.755 0.760 0.765
## [13] 0.770
##
## $counts
## [1] 7 17 69 173 257 224 172 59 20 1 0 1
##
## $density
## [1] 1.4 3.4 13.8 34.6 51.4 44.8 34.4 11.8 4.0 0.2 0.0 0.2
##
## $mids
## [1] 0.7125 0.7175 0.7225 0.7275 0.7325 0.7375 0.7425 0.7475 0.7525 0.7575
## [11] 0.7625 0.7675
##
## $xname
## [1] "simulated_means"
##
## $equidist
## [1] TRUE
##
## attr(,"class")
## [1] "histogram"
##
## [[2]]
## $breaks
## [1] 0.705 0.710 0.715 0.720 0.725 0.730 0.735 0.740 0.745 0.750
##
## $counts
## [1] 2 15 47 170 243 248 193 64 18
##
## $density
## [1] 0.4 3.0 9.4 34.0 48.6 49.6 38.6 12.8 3.6
##
## $mids
## [1] 0.7075 0.7125 0.7175 0.7225 0.7275 0.7325 0.7375 0.7425 0.7475
##
## $xname
## [1] "simulated_means"
##
## $equidist
## [1] TRUE
##
## attr(,"class")
## [1] "histogram"
##
## [[3]]
## $breaks
## [1] 0.705 0.710 0.715 0.720 0.725 0.730 0.735 0.740 0.745 0.750 0.755
##
## $counts
## [1] 2 3 24 84 208 294 226 121 30 8
##
## $density
## [1] 0.4 0.6 4.8 16.8 41.6 58.8 45.2 24.2 6.0 1.6
##
## $mids
## [1] 0.7075 0.7125 0.7175 0.7225 0.7275 0.7325 0.7375 0.7425 0.7475 0.7525
##
## $xname
## [1] "simulated_means"
##
## $equidist
## [1] TRUE
##
## attr(,"class")
## [1] "histogram"
##
## [[4]]
## $breaks
## [1] 0.700 0.705 0.710 0.715 0.720 0.725 0.730 0.735 0.740 0.745 0.750
##
## $counts
## [1] 4 7 44 121 212 290 219 71 26 6
##
## $density
## [1] 0.8 1.4 8.8 24.2 42.4 58.0 43.8 14.2 5.2 1.2
##
## $mids
## [1] 0.7025 0.7075 0.7125 0.7175 0.7225 0.7275 0.7325 0.7375 0.7425 0.7475
##
## $xname
## [1] "simulated_means"
##
## $equidist
## [1] TRUE
##
## attr(,"class")
## [1] "histogram"
##
## [[5]]
## $breaks
## [1] 0.700 0.705 0.710 0.715 0.720 0.725 0.730 0.735 0.740 0.745 0.750 0.755
##
## $counts
## [1] 1 0 2 17 77 202 295 224 140 36 6
##
## $density
## [1] 0.2 0.0 0.4 3.4 15.4 40.4 59.0 44.8 28.0 7.2 1.2
##
## $mids
## [1] 0.7025 0.7075 0.7125 0.7175 0.7225 0.7275 0.7325 0.7375 0.7425 0.7475
## [11] 0.7525
##
## $xname
## [1] "simulated_means"
##
## $equidist
## [1] TRUE
##
## attr(,"class")
## [1] "histogram"
The sub-samples are likely to exhibit variations due to the random nature of the sampling process. These differences can manifest in various ways:
Distribution of Variables: Each sub-sample may have a slightly different distribution of both categorical and continuous variables compared to the others. For instance, one sub-sample might have a higher proportion of workers from a specific department, while another might have a different distribution of productivity metrics.
Summary Statistics: Summary statistics such as mean, median, standard deviation, and quartiles may vary across sub-samples for continuous variables. Similarly, frequencies and proportions of categories may differ for categorical variables.
Outliers: The presence and characteristics of outliers could differ among sub-samples. Some sub-samples may have extreme values that are not present in others, impacting the overall distribution and analysis.
An anomaly in one sub-sample may not necessarily be considered anomalous in another due to the inherent variability introduced by random sampling. Here are some scenarios illustrating this:
Low Productivity: In one sub-sample, a particularly low actual productivity value might be considered an anomaly if it significantly deviates from the typical range observed in that sub-sample. However, the same productivity level might be within the normal range for another sub-sample due to differences in workforce composition, working conditions, or other factors.
High Overtime: A sub-sample with a higher than average overtime hours might not be anomalous if the nature of the work or the time period sampled requires extra hours. However, in a different sub-sample with a lower average overtime, the same level of overtime might be considered anomalous.
Unusual Distribution: Anomalies in the distribution of categorical variables, such as an unexpected spike in the frequency of a certain department or day of the week, may be context-dependent and not necessarily anomalies in other sub-samples.
Despite the variations, certain aspects of the data may remain consistent across all sub-samples. These consistent aspects can provide valuable insights into the overall characteristics of the dataset:
Overall Patterns: Despite minor fluctuations, overarching patterns in the data, such as the general distribution of productivity metrics or the relative frequencies of different departments, may be consistent across all sub-samples.
Common Trends: Trends observed in the entire dataset, such as seasonal variations or departmental performance trends, may also be evident in each sub-sample, albeit to varying degrees.
Central Tendency: Measures of central tendency, such as the mean or median, for key variables may exhibit relatively small variations across sub-samples, indicating a consistent underlying trend.
1)The histogram of simulated means illustrates the potiential range of average productivity values that could be observed in different sub samples
2)The width and shape of distribution indicates the variability in mean actual productivity across different simulated scenarios.