# Load required libraries
library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library(ggplot2)
library(tidyr)

# Load the dataset
nba_data <- read.csv("C:/Statistics/nba.csv")

# Set seed for reproducibility
set.seed(42)

# Calculate sample size (50% of data)
sample_size <- round(0.5 * nrow(nba_data))

# Generate 5 random samples with replacement
df_1 <- nba_data %>% sample_n(sample_size, replace = TRUE)
df_2 <-nba_data %>% sample_n(sample_size, replace = TRUE)
df_3 <-nba_data %>% sample_n(sample_size, replace = TRUE)
df_4 <-nba_data %>% sample_n(sample_size, replace = TRUE)
df_5 <-nba_data %>% sample_n(sample_size, replace = TRUE)

# Show length of each sample to confirm 50% sampling
cat("Sample Sizes:\n")
## Sample Sizes:
cat("df_1:", nrow(df_1), "\n")
## df_1: 852
cat("df_2:", nrow(df_2), "\n")
## df_2: 852
cat("df_3:", nrow(df_3), "\n")
## df_3: 852
cat("df_4:", nrow(df_4), "\n")
## df_4: 852
cat("df_5:", nrow(df_5), "\n")
## df_5: 852
## Display Sample Data
# Display the first few rows of each sample
head(df_1)
##       bbrID       Date  Tm Opp TRB AST STL BLK PTS GmSc  Season Playoffs Year
## 1 gilgesh01 2021-02-24 OKC SAS   8   4   0   1  42 34.3 2020-21    false 2021
## 2 collija04 2012-04-20 ATL BOS   7   4   0   0   8 11.0 2011-12    false 2012
## 3 owensbi01 1997-03-01 SAC MIL  17   9   1   1  31 36.6 1996-97    false 1997
## 4 murrade01 2022-02-11 SAS ATL  10  15   4   0  32 39.5 2021-22    false 2022
## 5 pricebr01 1992-11-12 WSB ORL   5   6   3   0  22 24.3 1992-93    false 1993
## 6 oquinky01 2013-03-27 ORL CHA  11   6   1   0  23 24.9 2012-13    false 2013
##   GameIndex GmScMovingZ GmScMovingZTop2Delta      Date2 GmSc2 GmScMovingZ2
## 1       190        2.17                 0.22 2019-10-25  25.5         1.95
## 2       762        4.85                 1.24 2002-01-05  19.0         3.61
## 3       382        4.89                 1.47 1998-02-04  27.4         3.42
## 4       316        3.01                 0.16 2017-01-19  19.9         2.85
## 5         3        4.79                 1.27 1996-01-15  32.3         3.52
## 6        46        4.15                 1.11 2016-12-02  22.1         3.04
head(df_2)
##       bbrID       Date  Tm Opp TRB AST STL BLK PTS GmSc  Season Playoffs Year
## 1 nowelja01 2021-03-11 MIN NOP   5   6   1   1  28 28.3 2020-21    false 2021
## 2 pargoje01 2012-11-21 CLE PHI   5   4   1   0  28 19.4 2012-13    false 2013
## 3 nocioan01 2010-12-29 PHI PHO  12   2   0   0  22 20.8 2010-11    false 2011
## 4  okurme01 2009-01-12 UTA IND   9   3   1   0  43 38.9 2008-09    false 2009
## 5 vucevni01 2021-02-12 ORL SAC   9   4   2   0  42 39.5 2020-21    false 2021
## 6 gallola01 2019-11-15 DET CHO   2   1   0   0  32 26.5 2019-20    false 2020
##   GameIndex GmScMovingZ GmScMovingZTop2Delta      Date2 GmSc2 GmScMovingZ2
## 1        37        4.13                 0.80 2021-12-27  23.9         3.33
## 2        47        3.80                 0.56 2012-12-08  16.9         3.24
## 3       498        3.42                 0.62 2004-11-19  21.0         2.80
## 4       556        3.97                 0.63 2004-01-14  28.6         3.34
## 5       635        3.07                 0.08 2015-01-23  32.6         2.99
## 6       355        3.79                 0.70 2015-04-13  24.1         3.09
head(df_3)
##       bbrID       Date  Tm Opp TRB AST STL BLK PTS GmSc  Season Playoffs Year
## 1 chiozch01 2021-04-07 BRK NOP   2   8   1   0  12 14.7 2020-21    false 2021
## 2 johnsjo02 2017-04-23 UTA LAC   5   5   1   0  28 25.1 2016-17     true 2017
## 3 priceaj01 2010-04-12 IND ORL   1   4   1   0  19 17.2 2009-10    false 2010
## 4 hardati01 1993-04-25 GSW SEA   7  18   2   1  41 41.2 1992-93    false 1993
## 5 marjabo01 2020-03-11 DAL DEN  17   1   2   0  31 29.4 2019-20    false 2020
## 6 evansje01 2012-04-26 UTA POR  10   1   3   3  13 18.7 2011-12    false 2012
##   GameIndex GmScMovingZ GmScMovingZTop2Delta      Date2 GmSc2 GmScMovingZ2
## 1        58        3.21                 0.67 2020-08-21  12.2         2.54
## 2      1325        3.54                 0.23 2013-12-16  34.7         3.31
## 3        54        3.23                 0.09 2013-01-14  18.8         3.14
## 4       320        2.89                 0.27 1997-03-07  36.5         2.62
## 5       240        4.39                 1.09 2016-04-13  23.2         3.30
## 6        77        3.51                 0.52 2011-03-11  14.1         2.99
head(df_4)
##       bbrID       Date  Tm Opp TRB AST STL BLK PTS GmSc  Season Playoffs Year
## 1 mathega01 2019-12-30 WAS MIA   4   0   0   0  28 22.3 2019-20    false 2020
## 2 bareajo01 2007-11-03 DAL SAC   1   5   1   0  25 20.9 2007-08    false 2008
## 3 mccoyje01 2000-04-19 SEA LAC   8   1   2   3  15 16.7 1999-00    false 2000
## 4 campafa01 2022-01-01 DEN HOU   4  12   5   2  22 28.7 2021-22    false 2022
## 5 webbech01 1994-01-04 GSW SAC  13   4   3   6  36 37.8 1993-94    false 1994
## 6 tabakza01 2001-04-15 IND CHI   6   4   0   1  16 14.0 2000-01    false 2001
##   GameIndex GmScMovingZ GmScMovingZTop2Delta      Date2 GmSc2 GmScMovingZ2
## 1        10        3.61                 0.91 2022-01-19  21.0         2.70
## 2        37        4.02                 0.67 2015-12-23  26.5         3.35
## 3        83        3.54                 0.66 2000-12-13  13.1         2.88
## 4       107        3.85                 1.12 2021-04-28  21.8         2.73
## 5        25        3.28                 0.20 1995-12-27  40.7         3.08
## 6       262        3.22                 0.16 1995-12-22  17.8         3.06
head(df_5)
##       bbrID       Date  Tm Opp TRB AST STL BLK PTS GmSc  Season Playoffs Year
## 1 pachuza01 2015-03-20 MIL BRK  21   7   1   0  22 29.2 2014-15    false 2015
## 2 douglto01 2010-11-04 NYK CHI   1   4   4   0  30 28.3 2010-11    false 2011
## 3 mcdankj01 2017-03-23 BRK PHO   8   1   2   1  16 16.5 2016-17    false 2017
## 4 hardati01 1993-04-25 GSW SEA   7  18   2   1  41 41.2 1992-93    false 1993
## 5 gaffoda01 2022-04-05 WAS MIN  12   3   0   1  24 25.5 2021-22    false 2022
## 6 daniean01 2000-12-26 SAS HOU   2  10   1   0  26 27.9 2000-01    false 2001
##   GameIndex GmScMovingZ GmScMovingZTop2Delta      Date2 GmSc2 GmScMovingZ2
## 1       841        3.63                 0.02 2018-03-11  20.3         3.61
## 2        59        3.18                 0.06 2011-04-05  26.4         3.12
## 3       143        3.95                 1.14 2014-11-29  16.7         2.81
## 4       320        2.89                 0.27 1997-03-07  36.5         2.62
## 5       170        2.85                 0.03 2019-11-18  20.2         2.82
## 6       235        3.77                 0.21 2004-01-31  30.9         3.56
## Summary Statistics
# Generate summary statistics for each sample
summary(df_1)
##     bbrID               Date                Tm                Opp           
##  Length:852         Length:852         Length:852         Length:852        
##  Class :character   Class :character   Class :character   Class :character  
##  Mode  :character   Mode  :character   Mode  :character   Mode  :character  
##                                                                             
##                                                                             
##                                                                             
##       TRB              AST              STL              BLK        
##  Min.   : 0.000   Min.   : 0.000   Min.   : 0.000   Min.   :0.0000  
##  1st Qu.: 4.000   1st Qu.: 1.000   1st Qu.: 0.000   1st Qu.:0.0000  
##  Median : 7.000   Median : 3.000   Median : 1.000   Median :0.0000  
##  Mean   : 7.394   Mean   : 3.884   Mean   : 1.621   Mean   :0.8615  
##  3rd Qu.:10.000   3rd Qu.: 6.000   3rd Qu.: 2.000   3rd Qu.:1.0000  
##  Max.   :26.000   Max.   :22.000   Max.   :10.000   Max.   :7.0000  
##       PTS             GmSc          Season            Playoffs        
##  Min.   : 5.00   Min.   : 6.40   Length:852         Length:852        
##  1st Qu.:19.00   1st Qu.:18.88   Class :character   Class :character  
##  Median :25.00   Median :24.30   Mode  :character   Mode  :character  
##  Mean   :25.81   Mean   :24.96                                        
##  3rd Qu.:32.00   3rd Qu.:29.95                                        
##  Max.   :70.00   Max.   :64.60                                        
##       Year        GameIndex       GmScMovingZ   GmScMovingZTop2Delta
##  Min.   :1985   Min.   :   0.0   Min.   :2.17   Min.   :0.0000      
##  1st Qu.:2000   1st Qu.:  67.0   1st Qu.:3.20   1st Qu.:0.1600      
##  Median :2009   Median : 150.5   Median :3.59   Median :0.3400      
##  Mean   :2008   Mean   : 251.3   Mean   :3.65   Mean   :0.4994      
##  3rd Qu.:2017   3rd Qu.: 375.5   3rd Qu.:3.97   3rd Qu.:0.7100      
##  Max.   :2022   Max.   :1592.0   Max.   :6.75   Max.   :3.3600      
##     Date2               GmSc2        GmScMovingZ2  
##  Length:852         Min.   : 6.20   Min.   :1.950  
##  Class :character   1st Qu.:16.90   1st Qu.:2.828  
##  Mode  :character   Median :21.50   Median :3.140  
##                     Mean   :22.65   Mean   :3.151  
##                     3rd Qu.:27.73   3rd Qu.:3.450  
##                     Max.   :51.80   Max.   :5.110
summary(df_2)
##     bbrID               Date                Tm                Opp           
##  Length:852         Length:852         Length:852         Length:852        
##  Class :character   Class :character   Class :character   Class :character  
##  Mode  :character   Mode  :character   Mode  :character   Mode  :character  
##                                                                             
##                                                                             
##                                                                             
##       TRB              AST              STL             BLK        
##  Min.   : 0.000   Min.   : 0.000   Min.   :0.000   Min.   :0.0000  
##  1st Qu.: 4.000   1st Qu.: 1.000   1st Qu.:0.000   1st Qu.:0.0000  
##  Median : 7.000   Median : 3.000   Median :1.000   Median :0.0000  
##  Mean   : 7.549   Mean   : 3.707   Mean   :1.638   Mean   :0.9237  
##  3rd Qu.:10.000   3rd Qu.: 5.000   3rd Qu.:2.250   3rd Qu.:1.0000  
##  Max.   :26.000   Max.   :22.000   Max.   :7.000   Max.   :9.0000  
##       PTS             GmSc          Season            Playoffs        
##  Min.   : 5.00   Min.   : 6.40   Length:852         Length:852        
##  1st Qu.:20.00   1st Qu.:19.70   Class :character   Class :character  
##  Median :25.00   Median :24.80   Mode  :character   Mode  :character  
##  Mean   :26.92   Mean   :25.73                                        
##  3rd Qu.:33.00   3rd Qu.:30.90                                        
##  Max.   :70.00   Max.   :54.50                                        
##       Year        GameIndex        GmScMovingZ    GmScMovingZTop2Delta
##  Min.   :1985   Min.   :   0.00   Min.   :2.170   Min.   :0.0000      
##  1st Qu.:1998   1st Qu.:  74.75   1st Qu.:3.220   1st Qu.:0.1300      
##  Median :2008   Median : 154.00   Median :3.600   Median :0.3400      
##  Mean   :2007   Mean   : 259.23   Mean   :3.654   Mean   :0.5001      
##  3rd Qu.:2016   3rd Qu.: 381.00   3rd Qu.:4.010   3rd Qu.:0.7200      
##  Max.   :2022   Max.   :1427.00   Max.   :6.750   Max.   :3.3600      
##     Date2               GmSc2        GmScMovingZ2  
##  Length:852         Min.   : 6.20   Min.   :1.840  
##  Class :character   1st Qu.:17.30   1st Qu.:2.857  
##  Mode  :character   Median :22.05   Median :3.140  
##                     Mean   :23.00   Mean   :3.154  
##                     3rd Qu.:28.00   3rd Qu.:3.430  
##                     Max.   :51.80   Max.   :4.710
summary(df_3)
##     bbrID               Date                Tm                Opp           
##  Length:852         Length:852         Length:852         Length:852        
##  Class :character   Class :character   Class :character   Class :character  
##  Mode  :character   Mode  :character   Mode  :character   Mode  :character  
##                                                                             
##                                                                             
##                                                                             
##       TRB             AST              STL             BLK       
##  Min.   : 0.00   Min.   : 0.000   Min.   :0.000   Min.   :0.000  
##  1st Qu.: 4.00   1st Qu.: 2.000   1st Qu.:1.000   1st Qu.:0.000  
##  Median : 7.00   Median : 3.000   Median :1.000   Median :0.000  
##  Mean   : 7.54   Mean   : 3.805   Mean   :1.566   Mean   :0.838  
##  3rd Qu.:10.00   3rd Qu.: 5.000   3rd Qu.:2.000   3rd Qu.:1.000  
##  Max.   :29.00   Max.   :19.000   Max.   :6.000   Max.   :8.000  
##       PTS             GmSc          Season            Playoffs        
##  Min.   : 4.00   Min.   : 9.60   Length:852         Length:852        
##  1st Qu.:19.00   1st Qu.:18.80   Class :character   Class :character  
##  Median :24.00   Median :24.00   Mode  :character   Mode  :character  
##  Mean   :25.98   Mean   :25.01                                        
##  3rd Qu.:32.00   3rd Qu.:30.00                                        
##  Max.   :70.00   Max.   :54.50                                        
##       Year        GameIndex       GmScMovingZ    GmScMovingZTop2Delta
##  Min.   :1985   Min.   :   0.0   Min.   :2.170   Min.   :0.0000      
##  1st Qu.:1998   1st Qu.:  66.0   1st Qu.:3.230   1st Qu.:0.1500      
##  Median :2009   Median : 149.0   Median :3.640   Median :0.3500      
##  Mean   :2007   Mean   : 255.1   Mean   :3.698   Mean   :0.5196      
##  3rd Qu.:2017   3rd Qu.: 359.8   3rd Qu.:4.080   3rd Qu.:0.7225      
##  Max.   :2022   Max.   :1427.0   Max.   :6.750   Max.   :3.5600      
##     Date2               GmSc2        GmScMovingZ2  
##  Length:852         Min.   : 5.30   Min.   :1.880  
##  Class :character   1st Qu.:17.20   1st Qu.:2.850  
##  Mode  :character   Median :21.45   Median :3.150  
##                     Mean   :22.69   Mean   :3.178  
##                     3rd Qu.:27.30   3rd Qu.:3.470  
##                     Max.   :51.80   Max.   :4.910
summary(df_4)
##     bbrID               Date                Tm                Opp           
##  Length:852         Length:852         Length:852         Length:852        
##  Class :character   Class :character   Class :character   Class :character  
##  Mode  :character   Mode  :character   Mode  :character   Mode  :character  
##                                                                             
##                                                                             
##                                                                             
##       TRB              AST              STL            BLK        
##  Min.   : 0.000   Min.   : 0.000   Min.   :0.00   Min.   :0.0000  
##  1st Qu.: 4.000   1st Qu.: 1.000   1st Qu.:1.00   1st Qu.:0.0000  
##  Median : 7.000   Median : 3.000   Median :1.00   Median :0.0000  
##  Mean   : 7.709   Mean   : 3.627   Mean   :1.62   Mean   :0.9284  
##  3rd Qu.:10.250   3rd Qu.: 5.000   3rd Qu.:2.00   3rd Qu.:1.0000  
##  Max.   :29.000   Max.   :22.000   Max.   :7.00   Max.   :9.0000  
##       PTS             GmSc          Season            Playoffs        
##  Min.   : 4.00   Min.   : 6.40   Length:852         Length:852        
##  1st Qu.:19.00   1st Qu.:19.38   Class :character   Class :character  
##  Median :25.00   Median :24.20   Mode  :character   Mode  :character  
##  Mean   :26.49   Mean   :25.50                                        
##  3rd Qu.:32.00   3rd Qu.:29.90                                        
##  Max.   :69.00   Max.   :64.60                                        
##       Year        GameIndex       GmScMovingZ    GmScMovingZTop2Delta
##  Min.   :1985   Min.   :   2.0   Min.   :2.170   Min.   :0.0000      
##  1st Qu.:1998   1st Qu.:  76.0   1st Qu.:3.270   1st Qu.:0.1675      
##  Median :2008   Median : 155.0   Median :3.670   Median :0.3750      
##  Mean   :2007   Mean   : 244.4   Mean   :3.712   Mean   :0.5077      
##  3rd Qu.:2017   3rd Qu.: 356.0   3rd Qu.:4.060   3rd Qu.:0.7000      
##  Max.   :2022   Max.   :1592.0   Max.   :6.220   Max.   :3.7300      
##     Date2               GmSc2        GmScMovingZ2  
##  Length:852         Min.   : 5.30   Min.   :1.880  
##  Class :character   1st Qu.:17.07   1st Qu.:2.880  
##  Mode  :character   Median :21.40   Median :3.200  
##                     Mean   :23.17   Mean   :3.204  
##                     3rd Qu.:28.02   3rd Qu.:3.502  
##                     Max.   :53.80   Max.   :4.600
summary(df_5)
##     bbrID               Date                Tm                Opp           
##  Length:852         Length:852         Length:852         Length:852        
##  Class :character   Class :character   Class :character   Class :character  
##  Mode  :character   Mode  :character   Mode  :character   Mode  :character  
##                                                                             
##                                                                             
##                                                                             
##       TRB              AST              STL             BLK        
##  Min.   : 0.000   Min.   : 0.000   Min.   :0.000   Min.   :0.0000  
##  1st Qu.: 4.000   1st Qu.: 1.000   1st Qu.:1.000   1st Qu.:0.0000  
##  Median : 7.000   Median : 3.000   Median :1.000   Median :0.0000  
##  Mean   : 7.528   Mean   : 3.641   Mean   :1.641   Mean   :0.9566  
##  3rd Qu.:10.000   3rd Qu.: 5.000   3rd Qu.:2.000   3rd Qu.:1.0000  
##  Max.   :29.000   Max.   :18.000   Max.   :7.000   Max.   :9.0000  
##       PTS             GmSc          Season            Playoffs        
##  Min.   : 4.00   Min.   : 9.60   Length:852         Length:852        
##  1st Qu.:19.00   1st Qu.:19.07   Class :character   Class :character  
##  Median :24.00   Median :24.30   Mode  :character   Mode  :character  
##  Mean   :25.51   Mean   :24.80                                        
##  3rd Qu.:31.00   3rd Qu.:29.20                                        
##  Max.   :70.00   Max.   :54.50                                        
##       Year        GameIndex       GmScMovingZ    GmScMovingZTop2Delta
##  Min.   :1985   Min.   :   0.0   Min.   :2.170   Min.   :0.000       
##  1st Qu.:1998   1st Qu.:  70.0   1st Qu.:3.208   1st Qu.:0.130       
##  Median :2008   Median : 138.0   Median :3.610   Median :0.330       
##  Mean   :2007   Mean   : 236.1   Mean   :3.660   Mean   :0.477       
##  3rd Qu.:2018   3rd Qu.: 320.2   3rd Qu.:4.030   3rd Qu.:0.670       
##  Max.   :2022   Max.   :1592.0   Max.   :6.750   Max.   :3.360       
##     Date2               GmSc2        GmScMovingZ2  
##  Length:852         Min.   : 7.30   Min.   :1.840  
##  Class :character   1st Qu.:16.80   1st Qu.:2.830  
##  Mode  :character   Median :21.30   Median :3.170  
##                     Mean   :22.32   Mean   :3.183  
##                     3rd Qu.:26.23   3rd Qu.:3.520  
##                     Max.   :53.80   Max.   :5.110
## Subsample Analysis
# Compare distributions across subsamples
cat("Distribution of Teams in Each Sample:\n")
## Distribution of Teams in Each Sample:
list(df_1, df_2, df_3, df_4, df_5) %>% 
  purrr::imap(~ {
    team_counts <- .x %>% group_by(Tm) %>% summarise(count = n())
    cat(paste0("Sample df_", .y, ":\n"))
    print(team_counts)
  })
## Sample df_1:
## # A tibble: 38 × 2
##    Tm    count
##    <chr> <int>
##  1 ATL      30
##  2 BOS      32
##  3 BRK       9
##  4 CHA       7
##  5 CHH       3
##  6 CHI      40
##  7 CHO       6
##  8 CLE      39
##  9 DAL      23
## 10 DEN      31
## # ℹ 28 more rows
## Sample df_2:
## # A tibble: 38 × 2
##    Tm    count
##    <chr> <int>
##  1 ATL      23
##  2 BOS      26
##  3 BRK      16
##  4 CHA       7
##  5 CHH       7
##  6 CHI      14
##  7 CHO       9
##  8 CLE      35
##  9 DAL      34
## 10 DEN      24
## # ℹ 28 more rows
## Sample df_3:
## # A tibble: 38 × 2
##    Tm    count
##    <chr> <int>
##  1 ATL      28
##  2 BOS      36
##  3 BRK      24
##  4 CHA       8
##  5 CHH       9
##  6 CHI      27
##  7 CHO       8
##  8 CLE      29
##  9 DAL      33
## 10 DEN      26
## # ℹ 28 more rows
## Sample df_4:
## # A tibble: 38 × 2
##    Tm    count
##    <chr> <int>
##  1 ATL      26
##  2 BOS      38
##  3 BRK      14
##  4 CHA      13
##  5 CHH       6
##  6 CHI      20
##  7 CHO       7
##  8 CLE      33
##  9 DAL      26
## 10 DEN      31
## # ℹ 28 more rows
## Sample df_5:
## # A tibble: 38 × 2
##    Tm    count
##    <chr> <int>
##  1 ATL      35
##  2 BOS      24
##  3 BRK      11
##  4 CHA       7
##  5 CHH      15
##  6 CHI      22
##  7 CHO      11
##  8 CLE      38
##  9 DAL      18
## 10 DEN      35
## # ℹ 28 more rows
## [[1]]
## # A tibble: 38 × 2
##    Tm    count
##    <chr> <int>
##  1 ATL      30
##  2 BOS      32
##  3 BRK       9
##  4 CHA       7
##  5 CHH       3
##  6 CHI      40
##  7 CHO       6
##  8 CLE      39
##  9 DAL      23
## 10 DEN      31
## # ℹ 28 more rows
## 
## [[2]]
## # A tibble: 38 × 2
##    Tm    count
##    <chr> <int>
##  1 ATL      23
##  2 BOS      26
##  3 BRK      16
##  4 CHA       7
##  5 CHH       7
##  6 CHI      14
##  7 CHO       9
##  8 CLE      35
##  9 DAL      34
## 10 DEN      24
## # ℹ 28 more rows
## 
## [[3]]
## # A tibble: 38 × 2
##    Tm    count
##    <chr> <int>
##  1 ATL      28
##  2 BOS      36
##  3 BRK      24
##  4 CHA       8
##  5 CHH       9
##  6 CHI      27
##  7 CHO       8
##  8 CLE      29
##  9 DAL      33
## 10 DEN      26
## # ℹ 28 more rows
## 
## [[4]]
## # A tibble: 38 × 2
##    Tm    count
##    <chr> <int>
##  1 ATL      26
##  2 BOS      38
##  3 BRK      14
##  4 CHA      13
##  5 CHH       6
##  6 CHI      20
##  7 CHO       7
##  8 CLE      33
##  9 DAL      26
## 10 DEN      31
## # ℹ 28 more rows
## 
## [[5]]
## # A tibble: 38 × 2
##    Tm    count
##    <chr> <int>
##  1 ATL      35
##  2 BOS      24
##  3 BRK      11
##  4 CHA       7
##  5 CHH      15
##  6 CHI      22
##  7 CHO      11
##  8 CLE      38
##  9 DAL      18
## 10 DEN      35
## # ℹ 28 more rows
# Identify anomalies by checking for outliers
merged_data <- bind_rows(df_1 %>% mutate(Sample = "df_1"),
                         df_2 %>% mutate(Sample = "df_2"),
                         df_3 %>% mutate(Sample = "df_3"),
                         df_4 %>% mutate(Sample = "df_4"),
                         df_5 %>% mutate(Sample = "df_5"))

# Combined Box Plot for Easier Comparison
ggplot(merged_data, aes(x = Sample, y = PTS)) +
  geom_boxplot() +
  theme_minimal() +
  labs(title = "Points Distribution Across Samples",
       x = "Sample",
       y = "Points")

# Find common patterns across all subsamples
common_summary <- function(df) {
  df %>% summarise(
    avg_pts = mean(PTS, na.rm = TRUE),
    avg_ast = mean(AST, na.rm = TRUE),
    unique_teams = n_distinct(Tm)
  )
}

# Generate common summaries for each sample
common_summary(df_1)
##    avg_pts  avg_ast unique_teams
## 1 25.81455 3.883803           38
common_summary(df_2)
##    avg_pts  avg_ast unique_teams
## 1 26.91667 3.706573           38
common_summary(df_3)
##    avg_pts  avg_ast unique_teams
## 1 25.98474 3.805164           38
common_summary(df_4)
##    avg_pts  avg_ast unique_teams
## 1 26.49178 3.626761           38
common_summary(df_5)
##    avg_pts  avg_ast unique_teams
## 1 25.50587 3.640845           38
# Interpretation of Summary Statistics
cat("\nSummary Interpretation:\n")
## 
## Summary Interpretation:
cat("The average points (avg_pts) across samples are quite consistent, suggesting stability in player performance metrics.\n")
## The average points (avg_pts) across samples are quite consistent, suggesting stability in player performance metrics.
cat("Assist averages (avg_ast) also show minimal variation, indicating a uniform playmaking contribution across samples.\n")
## Assist averages (avg_ast) also show minimal variation, indicating a uniform playmaking contribution across samples.
cat("The unique_teams metric confirms a balanced team representation in each sample, ensuring no sampling bias.\n")
## The unique_teams metric confirms a balanced team representation in each sample, ensuring no sampling bias.
# Visualization of common summary metrics
common_summaries <- bind_rows(
  common_summary(df_1) %>% mutate(Sample = "df_1"),
  common_summary(df_2) %>% mutate(Sample = "df_2"),
  common_summary(df_3) %>% mutate(Sample = "df_3"),
  common_summary(df_4) %>% mutate(Sample = "df_4"),
  common_summary(df_5) %>% mutate(Sample = "df_5")
)

# Bar plot for avg_pts, avg_ast, and unique_teams
common_summaries_long <- common_summaries %>%
  pivot_longer(cols = c(avg_pts, avg_ast, unique_teams),
               names_to = "Metric",
               values_to = "Value")

ggplot(common_summaries_long, aes(x = Sample, y = Value, fill = Metric)) +
  geom_bar(stat = "identity", position = "dodge") +
  theme_minimal() +
  labs(title = "Comparison of Key Metrics Across Samples",
       x = "Sample",
       y = "Value")