Load CSV file

Loading the csv file to garment_prod variable.

garment_prod <-read.csv("/Users/lakshmimounikab/Desktop/Stats with R/R practice/garment_prod.csv")
garment_prod$team <- as.character(garment_prod$team)
View(garment_prod)
summary(garment_prod)
##      date             quarter           department            day           
##  Length:1197        Length:1197        Length:1197        Length:1197       
##  Class :character   Class :character   Class :character   Class :character  
##  Mode  :character   Mode  :character   Mode  :character   Mode  :character  
##                                                                             
##                                                                             
##                                                                             
##                                                                             
##      team           targeted_productivity      smv             wip         
##  Length:1197        Min.   :0.0700        Min.   : 2.90   Min.   :    7.0  
##  Class :character   1st Qu.:0.7000        1st Qu.: 3.94   1st Qu.:  774.5  
##  Mode  :character   Median :0.7500        Median :15.26   Median : 1039.0  
##                     Mean   :0.7296        Mean   :15.06   Mean   : 1190.5  
##                     3rd Qu.:0.8000        3rd Qu.:24.26   3rd Qu.: 1252.5  
##                     Max.   :0.8000        Max.   :54.56   Max.   :23122.0  
##                                                           NA's   :506      
##    over_time       incentive         idle_time           idle_men      
##  Min.   :    0   Min.   :   0.00   Min.   :  0.0000   Min.   : 0.0000  
##  1st Qu.: 1440   1st Qu.:   0.00   1st Qu.:  0.0000   1st Qu.: 0.0000  
##  Median : 3960   Median :   0.00   Median :  0.0000   Median : 0.0000  
##  Mean   : 4567   Mean   :  38.21   Mean   :  0.7302   Mean   : 0.3693  
##  3rd Qu.: 6960   3rd Qu.:  50.00   3rd Qu.:  0.0000   3rd Qu.: 0.0000  
##  Max.   :25920   Max.   :3600.00   Max.   :300.0000   Max.   :45.0000  
##                                                                        
##  no_of_style_change no_of_workers   actual_productivity
##  Min.   :0.0000     Min.   : 2.00   Min.   :0.2337     
##  1st Qu.:0.0000     1st Qu.: 9.00   1st Qu.:0.6503     
##  Median :0.0000     Median :34.00   Median :0.7733     
##  Mean   :0.1504     Mean   :34.61   Mean   :0.7351     
##  3rd Qu.:0.0000     3rd Qu.:57.00   3rd Qu.:0.8503     
##  Max.   :2.0000     Max.   :89.00   Max.   :1.1204     
## 

Load required libraries

The required libraries are dplyr, purrr and ggplot2.

library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library(purrr)
library(ggplot2)

Static random sampling with aggregate values

In this method, we can select/pluck a particular column and perform aggregate functions on the sampled data.

sample_1 <- garment_prod |> sample_frac(0.5,replace = TRUE) 
View(sample_1)
sample_1 |> pluck("targeted_productivity") |> mean()
## [1] 0.727592
sample_1 |> pluck("wip") |> max()
## [1] NA
sample_1 |> pluck("actual_productivity") |> mean()
## [1] 0.7260001

Random sampling of garment_prod dataset

I’ve randomly generated samples taking 7 columns into consideration : quarter, day, targeted_productivity, actual_productivity, smv, over_time, and no_of_workers.

num <-sample(5:10,1)
columns <- c("quarter", "day", "targeted_productivity", "actual_productivity", "smv", "over_time", "no_of_workers")
subsample_list <- list()
for (i in 1:num) {
  s_size <- round(0.5 * nrow(garment_prod))
  s_index <- sample(1:nrow(garment_prod), size = s_size, replace = TRUE)
  subsample <- garment_prod[s_index, columns]
  subsample_list[[i]] <- subsample
}

Summaries of the sample data

Printing summary statistics for each sub sample created above using for-loop.

summary_table <- lapply(subsample_list, function(subsample){
  summary_df <-summary(subsample)
  knitr::kable(summary_df, caption = "Summary Statistics")
}) 
for (i in 1: num){
  cat("### Subsample", i, "summary statisics \n")
  print(summary_table[[i]])
}
## ### Subsample 1 summary statisics 
## 
## 
## Table: Summary Statistics
## 
## |   |  quarter        |    day          |targeted_productivity |actual_productivity |     smv      |  over_time   |no_of_workers |
## |:--|:----------------|:----------------|:---------------------|:-------------------|:-------------|:-------------|:-------------|
## |   |Length:598       |Length:598       |Min.   :0.3500        |Min.   :0.2337      |Min.   : 2.90 |Min.   :    0 |Min.   : 2.00 |
## |   |Class :character |Class :character |1st Qu.:0.7000        |1st Qu.:0.6542      |1st Qu.: 3.94 |1st Qu.: 1440 |1st Qu.: 9.00 |
## |   |Mode  :character |Mode  :character |Median :0.7500        |Median :0.7852      |Median :11.61 |Median : 3780 |Median :33.00 |
## |   |NA               |NA               |Mean   :0.7304        |Mean   :0.7405      |Mean   :14.60 |Mean   : 4472 |Mean   :33.69 |
## |   |NA               |NA               |3rd Qu.:0.8000        |3rd Qu.:0.8505      |3rd Qu.:22.94 |3rd Qu.: 6960 |3rd Qu.:57.00 |
## |   |NA               |NA               |Max.   :0.8000        |Max.   :1.1081      |Max.   :54.56 |Max.   :15000 |Max.   :60.00 |
## ### Subsample 2 summary statisics 
## 
## 
## Table: Summary Statistics
## 
## |   |  quarter        |    day          |targeted_productivity |actual_productivity |     smv      |  over_time   |no_of_workers |
## |:--|:----------------|:----------------|:---------------------|:-------------------|:-------------|:-------------|:-------------|
## |   |Length:598       |Length:598       |Min.   :0.0700        |Min.   :0.2358      |Min.   : 2.90 |Min.   :    0 |Min.   : 2.00 |
## |   |Class :character |Class :character |1st Qu.:0.7000        |1st Qu.:0.6548      |1st Qu.: 3.94 |1st Qu.: 1440 |1st Qu.: 9.00 |
## |   |Mode  :character |Mode  :character |Median :0.7500        |Median :0.7708      |Median :12.52 |Median : 3900 |Median :34.00 |
## |   |NA               |NA               |Mean   :0.7248        |Mean   :0.7375      |Mean   :14.69 |Mean   : 4530 |Mean   :34.52 |
## |   |NA               |NA               |3rd Qu.:0.8000        |3rd Qu.:0.8503      |3rd Qu.:22.94 |3rd Qu.: 6900 |3rd Qu.:57.00 |
## |   |NA               |NA               |Max.   :0.8000        |Max.   :1.0966      |Max.   :51.02 |Max.   :25920 |Max.   :60.00 |
## ### Subsample 3 summary statisics 
## 
## 
## Table: Summary Statistics
## 
## |   |  quarter        |    day          |targeted_productivity |actual_productivity |     smv      |  over_time   |no_of_workers |
## |:--|:----------------|:----------------|:---------------------|:-------------------|:-------------|:-------------|:-------------|
## |   |Length:598       |Length:598       |Min.   :0.3500        |Min.   :0.2358      |Min.   : 2.90 |Min.   :    0 |Min.   : 2.00 |
## |   |Class :character |Class :character |1st Qu.:0.7000        |1st Qu.:0.6497      |1st Qu.: 3.94 |1st Qu.: 1440 |1st Qu.: 9.00 |
## |   |Mode  :character |Mode  :character |Median :0.7500        |Median :0.7600      |Median :11.61 |Median : 3900 |Median :33.75 |
## |   |NA               |NA               |Mean   :0.7244        |Mean   :0.7302      |Mean   :14.40 |Mean   : 4505 |Mean   :33.67 |
## |   |NA               |NA               |3rd Qu.:0.8000        |3rd Qu.:0.8505      |3rd Qu.:22.52 |3rd Qu.: 6900 |3rd Qu.:57.00 |
## |   |NA               |NA               |Max.   :0.8000        |Max.   :1.1204      |Max.   :54.56 |Max.   :15120 |Max.   :89.00 |
## ### Subsample 4 summary statisics 
## 
## 
## Table: Summary Statistics
## 
## |   |  quarter        |    day          |targeted_productivity |actual_productivity |     smv      |  over_time   |no_of_workers |
## |:--|:----------------|:----------------|:---------------------|:-------------------|:-------------|:-------------|:-------------|
## |   |Length:598       |Length:598       |Min.   :0.3500        |Min.   :0.2473      |Min.   : 2.90 |Min.   :    0 |Min.   : 2.00 |
## |   |Class :character |Class :character |1st Qu.:0.7000        |1st Qu.:0.6501      |1st Qu.: 3.94 |1st Qu.: 1440 |1st Qu.: 9.00 |
## |   |Mode  :character |Mode  :character |Median :0.7500        |Median :0.7555      |Median :15.26 |Median : 4200 |Median :34.50 |
## |   |NA               |NA               |Mean   :0.7215        |Mean   :0.7288      |Mean   :15.23 |Mean   : 4654 |Mean   :35.34 |
## |   |NA               |NA               |3rd Qu.:0.8000        |3rd Qu.:0.8489      |3rd Qu.:24.26 |3rd Qu.: 6960 |3rd Qu.:57.00 |
## |   |NA               |NA               |Max.   :0.8000        |Max.   :1.1204      |Max.   :50.48 |Max.   :15000 |Max.   :60.00 |
## ### Subsample 5 summary statisics 
## 
## 
## Table: Summary Statistics
## 
## |   |  quarter        |    day          |targeted_productivity |actual_productivity |     smv      |  over_time   |no_of_workers |
## |:--|:----------------|:----------------|:---------------------|:-------------------|:-------------|:-------------|:-------------|
## |   |Length:598       |Length:598       |Min.   :0.0700        |Min.   :0.2473      |Min.   : 2.90 |Min.   :    0 |Min.   : 2.0  |
## |   |Class :character |Class :character |1st Qu.:0.7000        |1st Qu.:0.6629      |1st Qu.: 3.94 |1st Qu.: 1440 |1st Qu.: 9.0  |
## |   |Mode  :character |Mode  :character |Median :0.7500        |Median :0.7962      |Median :11.61 |Median : 3840 |Median :34.0  |
## |   |NA               |NA               |Mean   :0.7277        |Mean   :0.7403      |Mean   :14.71 |Mean   : 4512 |Mean   :33.7  |
## |   |NA               |NA               |3rd Qu.:0.8000        |3rd Qu.:0.8505      |3rd Qu.:23.29 |3rd Qu.: 6840 |3rd Qu.:57.0  |
## |   |NA               |NA               |Max.   :0.8000        |Max.   :1.0966      |Max.   :54.56 |Max.   :15120 |Max.   :60.0  |
## ### Subsample 6 summary statisics 
## 
## 
## Table: Summary Statistics
## 
## |   |  quarter        |    day          |targeted_productivity |actual_productivity |     smv      |  over_time   |no_of_workers |
## |:--|:----------------|:----------------|:---------------------|:-------------------|:-------------|:-------------|:-------------|
## |   |Length:598       |Length:598       |Min.   :0.0700        |Min.   :0.2337      |Min.   : 2.90 |Min.   :    0 |Min.   : 2.0  |
## |   |Class :character |Class :character |1st Qu.:0.7000        |1st Qu.:0.6501      |1st Qu.: 3.94 |1st Qu.: 1440 |1st Qu.: 9.0  |
## |   |Mode  :character |Mode  :character |Median :0.7500        |Median :0.7981      |Median :15.09 |Median : 3960 |Median :34.0  |
## |   |NA               |NA               |Mean   :0.7295        |Mean   :0.7348      |Mean   :14.93 |Mean   : 4587 |Mean   :34.2  |
## |   |NA               |NA               |3rd Qu.:0.8000        |3rd Qu.:0.8504      |3rd Qu.:24.26 |3rd Qu.: 6960 |3rd Qu.:57.0  |
## |   |NA               |NA               |Max.   :0.8000        |Max.   :1.1204      |Max.   :50.89 |Max.   :25920 |Max.   :60.0  |
## ### Subsample 7 summary statisics 
## 
## 
## Table: Summary Statistics
## 
## |   |  quarter        |    day          |targeted_productivity |actual_productivity |     smv      |  over_time   |no_of_workers |
## |:--|:----------------|:----------------|:---------------------|:-------------------|:-------------|:-------------|:-------------|
## |   |Length:598       |Length:598       |Min.   :0.0700        |Min.   :0.2337      |Min.   : 2.90 |Min.   :    0 |Min.   : 2.00 |
## |   |Class :character |Class :character |1st Qu.:0.7000        |1st Qu.:0.6507      |1st Qu.: 3.94 |1st Qu.: 1440 |1st Qu.:10.00 |
## |   |Mode  :character |Mode  :character |Median :0.7500        |Median :0.7961      |Median :15.26 |Median : 4080 |Median :34.00 |
## |   |NA               |NA               |Mean   :0.7239        |Mean   :0.7409      |Mean   :14.81 |Mean   : 4641 |Mean   :34.29 |
## |   |NA               |NA               |3rd Qu.:0.8000        |3rd Qu.:0.8505      |3rd Qu.:22.94 |3rd Qu.: 6960 |3rd Qu.:57.00 |
## |   |NA               |NA               |Max.   :0.8000        |Max.   :1.1005      |Max.   :51.02 |Max.   :15120 |Max.   :60.00 |
## ### Subsample 8 summary statisics 
## 
## 
## Table: Summary Statistics
## 
## |   |  quarter        |    day          |targeted_productivity |actual_productivity |     smv      |  over_time   |no_of_workers |
## |:--|:----------------|:----------------|:---------------------|:-------------------|:-------------|:-------------|:-------------|
## |   |Length:598       |Length:598       |Min.   :0.3500        |Min.   :0.2494      |Min.   : 2.90 |Min.   :    0 |Min.   : 4.00 |
## |   |Class :character |Class :character |1st Qu.:0.7000        |1st Qu.:0.6513      |1st Qu.: 3.94 |1st Qu.: 1440 |1st Qu.: 8.00 |
## |   |Mode  :character |Mode  :character |Median :0.7500        |Median :0.7688      |Median :15.26 |Median : 3780 |Median :34.00 |
## |   |NA               |NA               |Mean   :0.7303        |Mean   :0.7336      |Mean   :15.09 |Mean   : 4357 |Mean   :34.29 |
## |   |NA               |NA               |3rd Qu.:0.8000        |3rd Qu.:0.8500      |3rd Qu.:25.75 |3rd Qu.: 6840 |3rd Qu.:57.00 |
## |   |NA               |NA               |Max.   :0.8000        |Max.   :1.1204      |Max.   :54.56 |Max.   :15120 |Max.   :60.00 |
## ### Subsample 9 summary statisics 
## 
## 
## Table: Summary Statistics
## 
## |   |  quarter        |    day          |targeted_productivity |actual_productivity |     smv      |  over_time   |no_of_workers |
## |:--|:----------------|:----------------|:---------------------|:-------------------|:-------------|:-------------|:-------------|
## |   |Length:598       |Length:598       |Min.   :0.3500        |Min.   :0.2337      |Min.   : 2.90 |Min.   :    0 |Min.   : 2.00 |
## |   |Class :character |Class :character |1st Qu.:0.7000        |1st Qu.:0.6571      |1st Qu.: 3.94 |1st Qu.: 1440 |1st Qu.: 8.00 |
## |   |Mode  :character |Mode  :character |Median :0.7500        |Median :0.8000      |Median :14.75 |Median : 3540 |Median :34.00 |
## |   |NA               |NA               |Mean   :0.7301        |Mean   :0.7414      |Mean   :14.73 |Mean   : 4412 |Mean   :33.81 |
## |   |NA               |NA               |3rd Qu.:0.8000        |3rd Qu.:0.8506      |3rd Qu.:22.94 |3rd Qu.: 6960 |3rd Qu.:57.00 |
## |   |NA               |NA               |Max.   :0.8000        |Max.   :1.1204      |Max.   :54.56 |Max.   :25920 |Max.   :60.00 |

Scrutinizing/Analysing subsamples

To understand the variation within the sampled data set, I’ve chosen one column named “no_of_workers”. To examine the sub sample, I’ve chosen to use histograms.

# summary statistics for each subsamples
summary_stats <-lapply(subsample_list, summary)
# plot histogram
histograms <- lapply(subsample_list, function(subsample) {
  ggplot(subsample, aes(x = no_of_workers)) +
    geom_histogram(binwidth = 1, fill = 'blue', color = 'black') +
    labs(title = "Histogram for SMV", x = 'Value', y = 'Frequency')
})
# display histograms and summary
for (i in 1:num) {
  cat("Subsample", i, "summary statistics:\n")
  print(summary_stats[[i]])
  print(histograms[[i]])
}
## Subsample 1 summary statistics:
##    quarter              day            targeted_productivity
##  Length:598         Length:598         Min.   :0.3500       
##  Class :character   Class :character   1st Qu.:0.7000       
##  Mode  :character   Mode  :character   Median :0.7500       
##                                        Mean   :0.7304       
##                                        3rd Qu.:0.8000       
##                                        Max.   :0.8000       
##  actual_productivity      smv          over_time     no_of_workers  
##  Min.   :0.2337      Min.   : 2.90   Min.   :    0   Min.   : 2.00  
##  1st Qu.:0.6542      1st Qu.: 3.94   1st Qu.: 1440   1st Qu.: 9.00  
##  Median :0.7852      Median :11.61   Median : 3780   Median :33.00  
##  Mean   :0.7405      Mean   :14.60   Mean   : 4472   Mean   :33.69  
##  3rd Qu.:0.8505      3rd Qu.:22.94   3rd Qu.: 6960   3rd Qu.:57.00  
##  Max.   :1.1081      Max.   :54.56   Max.   :15000   Max.   :60.00

## Subsample 2 summary statistics:
##    quarter              day            targeted_productivity
##  Length:598         Length:598         Min.   :0.0700       
##  Class :character   Class :character   1st Qu.:0.7000       
##  Mode  :character   Mode  :character   Median :0.7500       
##                                        Mean   :0.7248       
##                                        3rd Qu.:0.8000       
##                                        Max.   :0.8000       
##  actual_productivity      smv          over_time     no_of_workers  
##  Min.   :0.2358      Min.   : 2.90   Min.   :    0   Min.   : 2.00  
##  1st Qu.:0.6548      1st Qu.: 3.94   1st Qu.: 1440   1st Qu.: 9.00  
##  Median :0.7708      Median :12.52   Median : 3900   Median :34.00  
##  Mean   :0.7375      Mean   :14.69   Mean   : 4530   Mean   :34.52  
##  3rd Qu.:0.8503      3rd Qu.:22.94   3rd Qu.: 6900   3rd Qu.:57.00  
##  Max.   :1.0966      Max.   :51.02   Max.   :25920   Max.   :60.00

## Subsample 3 summary statistics:
##    quarter              day            targeted_productivity
##  Length:598         Length:598         Min.   :0.3500       
##  Class :character   Class :character   1st Qu.:0.7000       
##  Mode  :character   Mode  :character   Median :0.7500       
##                                        Mean   :0.7244       
##                                        3rd Qu.:0.8000       
##                                        Max.   :0.8000       
##  actual_productivity      smv          over_time     no_of_workers  
##  Min.   :0.2358      Min.   : 2.90   Min.   :    0   Min.   : 2.00  
##  1st Qu.:0.6497      1st Qu.: 3.94   1st Qu.: 1440   1st Qu.: 9.00  
##  Median :0.7600      Median :11.61   Median : 3900   Median :33.75  
##  Mean   :0.7302      Mean   :14.40   Mean   : 4505   Mean   :33.67  
##  3rd Qu.:0.8505      3rd Qu.:22.52   3rd Qu.: 6900   3rd Qu.:57.00  
##  Max.   :1.1204      Max.   :54.56   Max.   :15120   Max.   :89.00

## Subsample 4 summary statistics:
##    quarter              day            targeted_productivity
##  Length:598         Length:598         Min.   :0.3500       
##  Class :character   Class :character   1st Qu.:0.7000       
##  Mode  :character   Mode  :character   Median :0.7500       
##                                        Mean   :0.7215       
##                                        3rd Qu.:0.8000       
##                                        Max.   :0.8000       
##  actual_productivity      smv          over_time     no_of_workers  
##  Min.   :0.2473      Min.   : 2.90   Min.   :    0   Min.   : 2.00  
##  1st Qu.:0.6501      1st Qu.: 3.94   1st Qu.: 1440   1st Qu.: 9.00  
##  Median :0.7555      Median :15.26   Median : 4200   Median :34.50  
##  Mean   :0.7288      Mean   :15.23   Mean   : 4654   Mean   :35.34  
##  3rd Qu.:0.8489      3rd Qu.:24.26   3rd Qu.: 6960   3rd Qu.:57.00  
##  Max.   :1.1204      Max.   :50.48   Max.   :15000   Max.   :60.00

## Subsample 5 summary statistics:
##    quarter              day            targeted_productivity
##  Length:598         Length:598         Min.   :0.0700       
##  Class :character   Class :character   1st Qu.:0.7000       
##  Mode  :character   Mode  :character   Median :0.7500       
##                                        Mean   :0.7277       
##                                        3rd Qu.:0.8000       
##                                        Max.   :0.8000       
##  actual_productivity      smv          over_time     no_of_workers 
##  Min.   :0.2473      Min.   : 2.90   Min.   :    0   Min.   : 2.0  
##  1st Qu.:0.6629      1st Qu.: 3.94   1st Qu.: 1440   1st Qu.: 9.0  
##  Median :0.7962      Median :11.61   Median : 3840   Median :34.0  
##  Mean   :0.7403      Mean   :14.71   Mean   : 4512   Mean   :33.7  
##  3rd Qu.:0.8505      3rd Qu.:23.29   3rd Qu.: 6840   3rd Qu.:57.0  
##  Max.   :1.0966      Max.   :54.56   Max.   :15120   Max.   :60.0

## Subsample 6 summary statistics:
##    quarter              day            targeted_productivity
##  Length:598         Length:598         Min.   :0.0700       
##  Class :character   Class :character   1st Qu.:0.7000       
##  Mode  :character   Mode  :character   Median :0.7500       
##                                        Mean   :0.7295       
##                                        3rd Qu.:0.8000       
##                                        Max.   :0.8000       
##  actual_productivity      smv          over_time     no_of_workers 
##  Min.   :0.2337      Min.   : 2.90   Min.   :    0   Min.   : 2.0  
##  1st Qu.:0.6501      1st Qu.: 3.94   1st Qu.: 1440   1st Qu.: 9.0  
##  Median :0.7981      Median :15.09   Median : 3960   Median :34.0  
##  Mean   :0.7348      Mean   :14.93   Mean   : 4587   Mean   :34.2  
##  3rd Qu.:0.8504      3rd Qu.:24.26   3rd Qu.: 6960   3rd Qu.:57.0  
##  Max.   :1.1204      Max.   :50.89   Max.   :25920   Max.   :60.0

## Subsample 7 summary statistics:
##    quarter              day            targeted_productivity
##  Length:598         Length:598         Min.   :0.0700       
##  Class :character   Class :character   1st Qu.:0.7000       
##  Mode  :character   Mode  :character   Median :0.7500       
##                                        Mean   :0.7239       
##                                        3rd Qu.:0.8000       
##                                        Max.   :0.8000       
##  actual_productivity      smv          over_time     no_of_workers  
##  Min.   :0.2337      Min.   : 2.90   Min.   :    0   Min.   : 2.00  
##  1st Qu.:0.6507      1st Qu.: 3.94   1st Qu.: 1440   1st Qu.:10.00  
##  Median :0.7961      Median :15.26   Median : 4080   Median :34.00  
##  Mean   :0.7409      Mean   :14.81   Mean   : 4641   Mean   :34.29  
##  3rd Qu.:0.8505      3rd Qu.:22.94   3rd Qu.: 6960   3rd Qu.:57.00  
##  Max.   :1.1005      Max.   :51.02   Max.   :15120   Max.   :60.00

## Subsample 8 summary statistics:
##    quarter              day            targeted_productivity
##  Length:598         Length:598         Min.   :0.3500       
##  Class :character   Class :character   1st Qu.:0.7000       
##  Mode  :character   Mode  :character   Median :0.7500       
##                                        Mean   :0.7303       
##                                        3rd Qu.:0.8000       
##                                        Max.   :0.8000       
##  actual_productivity      smv          over_time     no_of_workers  
##  Min.   :0.2494      Min.   : 2.90   Min.   :    0   Min.   : 4.00  
##  1st Qu.:0.6513      1st Qu.: 3.94   1st Qu.: 1440   1st Qu.: 8.00  
##  Median :0.7688      Median :15.26   Median : 3780   Median :34.00  
##  Mean   :0.7336      Mean   :15.09   Mean   : 4357   Mean   :34.29  
##  3rd Qu.:0.8500      3rd Qu.:25.75   3rd Qu.: 6840   3rd Qu.:57.00  
##  Max.   :1.1204      Max.   :54.56   Max.   :15120   Max.   :60.00

## Subsample 9 summary statistics:
##    quarter              day            targeted_productivity
##  Length:598         Length:598         Min.   :0.3500       
##  Class :character   Class :character   1st Qu.:0.7000       
##  Mode  :character   Mode  :character   Median :0.7500       
##                                        Mean   :0.7301       
##                                        3rd Qu.:0.8000       
##                                        Max.   :0.8000       
##  actual_productivity      smv          over_time     no_of_workers  
##  Min.   :0.2337      Min.   : 2.90   Min.   :    0   Min.   : 2.00  
##  1st Qu.:0.6571      1st Qu.: 3.94   1st Qu.: 1440   1st Qu.: 8.00  
##  Median :0.8000      Median :14.75   Median : 3540   Median :34.00  
##  Mean   :0.7414      Mean   :14.73   Mean   : 4412   Mean   :33.81  
##  3rd Qu.:0.8506      3rd Qu.:22.94   3rd Qu.: 6960   3rd Qu.:57.00  
##  Max.   :1.1204      Max.   :54.56   Max.   :25920   Max.   :60.00

On observing the histograms, I’ve observed that: - The frequency of each sub sample varies from a range of 50- 150 or 200. - Overall variation of all the sub samples are not that different. In other words, there is no huge difference in variation for the sub samples. - The peaks points might seem same but are different with respect the frequency.

Anomalies and consistency

To understand anomaly and consistency, I’ve chosen to go with mean and standard deviation for “no_of_workers” columns for each sub sample.

means <- lapply(subsample_list, function(subsample) {
  mean(subsample$no_of_workers)
})
sds <- lapply(subsample_list, function(subsample) {
  sd(subsample$no_of_workers)
})
for (i in 1:num) {
  cat("Subsample", i, "Mean of Number of workers:", means[[i]], "\n")
  cat("Subsample", i, "SD of Number of workers:", sds[[i]], "\n")
}
## Subsample 1 Mean of Number of workers: 33.69398 
## Subsample 1 SD of Number of workers: 22.10367 
## Subsample 2 Mean of Number of workers: 34.51839 
## Subsample 2 SD of Number of workers: 22.21953 
## Subsample 3 Mean of Number of workers: 33.66555 
## Subsample 3 SD of Number of workers: 22.46121 
## Subsample 4 Mean of Number of workers: 35.33612 
## Subsample 4 SD of Number of workers: 22.00519 
## Subsample 5 Mean of Number of workers: 33.70485 
## Subsample 5 SD of Number of workers: 22.26616 
## Subsample 6 Mean of Number of workers: 34.199 
## Subsample 6 SD of Number of workers: 22.18205 
## Subsample 7 Mean of Number of workers: 34.29097 
## Subsample 7 SD of Number of workers: 21.93785 
## Subsample 8 Mean of Number of workers: 34.29264 
## Subsample 8 SD of Number of workers: 22.35408 
## Subsample 9 Mean of Number of workers: 33.80602 
## Subsample 9 SD of Number of workers: 22.51149

For targeted_productivity, I’ve chosen to go with sum and variance aggregate function.

sums<- lapply(subsample_list, function(subsample) {
  sum(subsample$targeted_productivity)
})
vars <- lapply(subsample_list, function(subsample) {
  var(subsample$targeted_productivity)
})
for (i in 1:num) {
  cat("Subsample", i, "Sum of targeted productivity:", sums[[i]], "\n")
  cat("Subsample", i, "Variance of targeted productivity:", vars[[i]], "\n")
}
## Subsample 1 Sum of targeted productivity: 436.8 
## Subsample 1 Variance of targeted productivity: 0.008645037 
## Subsample 2 Sum of targeted productivity: 433.44 
## Subsample 2 Variance of targeted productivity: 0.01122367 
## Subsample 3 Sum of targeted productivity: 433.2 
## Subsample 3 Variance of targeted productivity: 0.00988031 
## Subsample 4 Sum of targeted productivity: 431.45 
## Subsample 4 Variance of targeted productivity: 0.01210448 
## Subsample 5 Sum of targeted productivity: 435.19 
## Subsample 5 Variance of targeted productivity: 0.01171031 
## Subsample 6 Sum of targeted productivity: 436.27 
## Subsample 6 Variance of targeted productivity: 0.009756747 
## Subsample 7 Sum of targeted productivity: 432.92 
## Subsample 7 Variance of targeted productivity: 0.01136765 
## Subsample 8 Sum of targeted productivity: 436.7 
## Subsample 8 Variance of targeted productivity: 0.009693731 
## Subsample 9 Sum of targeted productivity: 436.6 
## Subsample 9 Variance of targeted productivity: 0.01030686

Conclusion

On observing the data, it is quite obvious that, the mean of each sub sample, has no prominent variation. It ranges from 30 to 40. Similarly, coming to standard deviation, there is no significant change in the values. The value ranges from 21 to 23. This range and minimal deviation proves that the data for no_of_workers column within the sub samples is consistent. For targeted_productivity, sum function ranges from 435 to 441 for this iteration. Variance function varies from 0.007 to 0.015, which is relatively high compared to the other functions. Overall, I would say that based on the aggregate functions performed above and analyzing them, the data is comfortably consistent.