# Load required libraries
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(ggplot2)
library(tidyr)
# Load the dataset
nba_data <- read.csv("C:/Statistics/nba.csv")
# Set seed for reproducibility
set.seed(42)
# Calculate sample size (50% of data)
sample_size <- round(0.5 * nrow(nba_data))
# Generate 5 random samples with replacement
df_1 <- nba_data %>% sample_n(sample_size, replace = TRUE)
df_2 <-nba_data %>% sample_n(sample_size, replace = TRUE)
df_3 <-nba_data %>% sample_n(sample_size, replace = TRUE)
df_4 <-nba_data %>% sample_n(sample_size, replace = TRUE)
df_5 <-nba_data %>% sample_n(sample_size, replace = TRUE)
# Show length of each sample to confirm 50% sampling
cat("Sample Sizes:\n")
## Sample Sizes:
cat("df_1:", nrow(df_1), "\n")
## df_1: 852
cat("df_2:", nrow(df_2), "\n")
## df_2: 852
cat("df_3:", nrow(df_3), "\n")
## df_3: 852
cat("df_4:", nrow(df_4), "\n")
## df_4: 852
cat("df_5:", nrow(df_5), "\n")
## df_5: 852
## Display Sample Data
# Display the first few rows of each sample
head(df_1)
## bbrID Date Tm Opp TRB AST STL BLK PTS GmSc Season Playoffs Year
## 1 gilgesh01 2021-02-24 OKC SAS 8 4 0 1 42 34.3 2020-21 false 2021
## 2 collija04 2012-04-20 ATL BOS 7 4 0 0 8 11.0 2011-12 false 2012
## 3 owensbi01 1997-03-01 SAC MIL 17 9 1 1 31 36.6 1996-97 false 1997
## 4 murrade01 2022-02-11 SAS ATL 10 15 4 0 32 39.5 2021-22 false 2022
## 5 pricebr01 1992-11-12 WSB ORL 5 6 3 0 22 24.3 1992-93 false 1993
## 6 oquinky01 2013-03-27 ORL CHA 11 6 1 0 23 24.9 2012-13 false 2013
## GameIndex GmScMovingZ GmScMovingZTop2Delta Date2 GmSc2 GmScMovingZ2
## 1 190 2.17 0.22 2019-10-25 25.5 1.95
## 2 762 4.85 1.24 2002-01-05 19.0 3.61
## 3 382 4.89 1.47 1998-02-04 27.4 3.42
## 4 316 3.01 0.16 2017-01-19 19.9 2.85
## 5 3 4.79 1.27 1996-01-15 32.3 3.52
## 6 46 4.15 1.11 2016-12-02 22.1 3.04
head(df_2)
## bbrID Date Tm Opp TRB AST STL BLK PTS GmSc Season Playoffs Year
## 1 nowelja01 2021-03-11 MIN NOP 5 6 1 1 28 28.3 2020-21 false 2021
## 2 pargoje01 2012-11-21 CLE PHI 5 4 1 0 28 19.4 2012-13 false 2013
## 3 nocioan01 2010-12-29 PHI PHO 12 2 0 0 22 20.8 2010-11 false 2011
## 4 okurme01 2009-01-12 UTA IND 9 3 1 0 43 38.9 2008-09 false 2009
## 5 vucevni01 2021-02-12 ORL SAC 9 4 2 0 42 39.5 2020-21 false 2021
## 6 gallola01 2019-11-15 DET CHO 2 1 0 0 32 26.5 2019-20 false 2020
## GameIndex GmScMovingZ GmScMovingZTop2Delta Date2 GmSc2 GmScMovingZ2
## 1 37 4.13 0.80 2021-12-27 23.9 3.33
## 2 47 3.80 0.56 2012-12-08 16.9 3.24
## 3 498 3.42 0.62 2004-11-19 21.0 2.80
## 4 556 3.97 0.63 2004-01-14 28.6 3.34
## 5 635 3.07 0.08 2015-01-23 32.6 2.99
## 6 355 3.79 0.70 2015-04-13 24.1 3.09
head(df_3)
## bbrID Date Tm Opp TRB AST STL BLK PTS GmSc Season Playoffs Year
## 1 chiozch01 2021-04-07 BRK NOP 2 8 1 0 12 14.7 2020-21 false 2021
## 2 johnsjo02 2017-04-23 UTA LAC 5 5 1 0 28 25.1 2016-17 true 2017
## 3 priceaj01 2010-04-12 IND ORL 1 4 1 0 19 17.2 2009-10 false 2010
## 4 hardati01 1993-04-25 GSW SEA 7 18 2 1 41 41.2 1992-93 false 1993
## 5 marjabo01 2020-03-11 DAL DEN 17 1 2 0 31 29.4 2019-20 false 2020
## 6 evansje01 2012-04-26 UTA POR 10 1 3 3 13 18.7 2011-12 false 2012
## GameIndex GmScMovingZ GmScMovingZTop2Delta Date2 GmSc2 GmScMovingZ2
## 1 58 3.21 0.67 2020-08-21 12.2 2.54
## 2 1325 3.54 0.23 2013-12-16 34.7 3.31
## 3 54 3.23 0.09 2013-01-14 18.8 3.14
## 4 320 2.89 0.27 1997-03-07 36.5 2.62
## 5 240 4.39 1.09 2016-04-13 23.2 3.30
## 6 77 3.51 0.52 2011-03-11 14.1 2.99
head(df_4)
## bbrID Date Tm Opp TRB AST STL BLK PTS GmSc Season Playoffs Year
## 1 mathega01 2019-12-30 WAS MIA 4 0 0 0 28 22.3 2019-20 false 2020
## 2 bareajo01 2007-11-03 DAL SAC 1 5 1 0 25 20.9 2007-08 false 2008
## 3 mccoyje01 2000-04-19 SEA LAC 8 1 2 3 15 16.7 1999-00 false 2000
## 4 campafa01 2022-01-01 DEN HOU 4 12 5 2 22 28.7 2021-22 false 2022
## 5 webbech01 1994-01-04 GSW SAC 13 4 3 6 36 37.8 1993-94 false 1994
## 6 tabakza01 2001-04-15 IND CHI 6 4 0 1 16 14.0 2000-01 false 2001
## GameIndex GmScMovingZ GmScMovingZTop2Delta Date2 GmSc2 GmScMovingZ2
## 1 10 3.61 0.91 2022-01-19 21.0 2.70
## 2 37 4.02 0.67 2015-12-23 26.5 3.35
## 3 83 3.54 0.66 2000-12-13 13.1 2.88
## 4 107 3.85 1.12 2021-04-28 21.8 2.73
## 5 25 3.28 0.20 1995-12-27 40.7 3.08
## 6 262 3.22 0.16 1995-12-22 17.8 3.06
head(df_5)
## bbrID Date Tm Opp TRB AST STL BLK PTS GmSc Season Playoffs Year
## 1 pachuza01 2015-03-20 MIL BRK 21 7 1 0 22 29.2 2014-15 false 2015
## 2 douglto01 2010-11-04 NYK CHI 1 4 4 0 30 28.3 2010-11 false 2011
## 3 mcdankj01 2017-03-23 BRK PHO 8 1 2 1 16 16.5 2016-17 false 2017
## 4 hardati01 1993-04-25 GSW SEA 7 18 2 1 41 41.2 1992-93 false 1993
## 5 gaffoda01 2022-04-05 WAS MIN 12 3 0 1 24 25.5 2021-22 false 2022
## 6 daniean01 2000-12-26 SAS HOU 2 10 1 0 26 27.9 2000-01 false 2001
## GameIndex GmScMovingZ GmScMovingZTop2Delta Date2 GmSc2 GmScMovingZ2
## 1 841 3.63 0.02 2018-03-11 20.3 3.61
## 2 59 3.18 0.06 2011-04-05 26.4 3.12
## 3 143 3.95 1.14 2014-11-29 16.7 2.81
## 4 320 2.89 0.27 1997-03-07 36.5 2.62
## 5 170 2.85 0.03 2019-11-18 20.2 2.82
## 6 235 3.77 0.21 2004-01-31 30.9 3.56
## Summary Statistics
# Generate summary statistics for each sample
summary(df_1)
## bbrID Date Tm Opp
## Length:852 Length:852 Length:852 Length:852
## Class :character Class :character Class :character Class :character
## Mode :character Mode :character Mode :character Mode :character
##
##
##
## TRB AST STL BLK
## Min. : 0.000 Min. : 0.000 Min. : 0.000 Min. :0.0000
## 1st Qu.: 4.000 1st Qu.: 1.000 1st Qu.: 0.000 1st Qu.:0.0000
## Median : 7.000 Median : 3.000 Median : 1.000 Median :0.0000
## Mean : 7.394 Mean : 3.884 Mean : 1.621 Mean :0.8615
## 3rd Qu.:10.000 3rd Qu.: 6.000 3rd Qu.: 2.000 3rd Qu.:1.0000
## Max. :26.000 Max. :22.000 Max. :10.000 Max. :7.0000
## PTS GmSc Season Playoffs
## Min. : 5.00 Min. : 6.40 Length:852 Length:852
## 1st Qu.:19.00 1st Qu.:18.88 Class :character Class :character
## Median :25.00 Median :24.30 Mode :character Mode :character
## Mean :25.81 Mean :24.96
## 3rd Qu.:32.00 3rd Qu.:29.95
## Max. :70.00 Max. :64.60
## Year GameIndex GmScMovingZ GmScMovingZTop2Delta
## Min. :1985 Min. : 0.0 Min. :2.17 Min. :0.0000
## 1st Qu.:2000 1st Qu.: 67.0 1st Qu.:3.20 1st Qu.:0.1600
## Median :2009 Median : 150.5 Median :3.59 Median :0.3400
## Mean :2008 Mean : 251.3 Mean :3.65 Mean :0.4994
## 3rd Qu.:2017 3rd Qu.: 375.5 3rd Qu.:3.97 3rd Qu.:0.7100
## Max. :2022 Max. :1592.0 Max. :6.75 Max. :3.3600
## Date2 GmSc2 GmScMovingZ2
## Length:852 Min. : 6.20 Min. :1.950
## Class :character 1st Qu.:16.90 1st Qu.:2.828
## Mode :character Median :21.50 Median :3.140
## Mean :22.65 Mean :3.151
## 3rd Qu.:27.73 3rd Qu.:3.450
## Max. :51.80 Max. :5.110
summary(df_2)
## bbrID Date Tm Opp
## Length:852 Length:852 Length:852 Length:852
## Class :character Class :character Class :character Class :character
## Mode :character Mode :character Mode :character Mode :character
##
##
##
## TRB AST STL BLK
## Min. : 0.000 Min. : 0.000 Min. :0.000 Min. :0.0000
## 1st Qu.: 4.000 1st Qu.: 1.000 1st Qu.:0.000 1st Qu.:0.0000
## Median : 7.000 Median : 3.000 Median :1.000 Median :0.0000
## Mean : 7.549 Mean : 3.707 Mean :1.638 Mean :0.9237
## 3rd Qu.:10.000 3rd Qu.: 5.000 3rd Qu.:2.250 3rd Qu.:1.0000
## Max. :26.000 Max. :22.000 Max. :7.000 Max. :9.0000
## PTS GmSc Season Playoffs
## Min. : 5.00 Min. : 6.40 Length:852 Length:852
## 1st Qu.:20.00 1st Qu.:19.70 Class :character Class :character
## Median :25.00 Median :24.80 Mode :character Mode :character
## Mean :26.92 Mean :25.73
## 3rd Qu.:33.00 3rd Qu.:30.90
## Max. :70.00 Max. :54.50
## Year GameIndex GmScMovingZ GmScMovingZTop2Delta
## Min. :1985 Min. : 0.00 Min. :2.170 Min. :0.0000
## 1st Qu.:1998 1st Qu.: 74.75 1st Qu.:3.220 1st Qu.:0.1300
## Median :2008 Median : 154.00 Median :3.600 Median :0.3400
## Mean :2007 Mean : 259.23 Mean :3.654 Mean :0.5001
## 3rd Qu.:2016 3rd Qu.: 381.00 3rd Qu.:4.010 3rd Qu.:0.7200
## Max. :2022 Max. :1427.00 Max. :6.750 Max. :3.3600
## Date2 GmSc2 GmScMovingZ2
## Length:852 Min. : 6.20 Min. :1.840
## Class :character 1st Qu.:17.30 1st Qu.:2.857
## Mode :character Median :22.05 Median :3.140
## Mean :23.00 Mean :3.154
## 3rd Qu.:28.00 3rd Qu.:3.430
## Max. :51.80 Max. :4.710
summary(df_3)
## bbrID Date Tm Opp
## Length:852 Length:852 Length:852 Length:852
## Class :character Class :character Class :character Class :character
## Mode :character Mode :character Mode :character Mode :character
##
##
##
## TRB AST STL BLK
## Min. : 0.00 Min. : 0.000 Min. :0.000 Min. :0.000
## 1st Qu.: 4.00 1st Qu.: 2.000 1st Qu.:1.000 1st Qu.:0.000
## Median : 7.00 Median : 3.000 Median :1.000 Median :0.000
## Mean : 7.54 Mean : 3.805 Mean :1.566 Mean :0.838
## 3rd Qu.:10.00 3rd Qu.: 5.000 3rd Qu.:2.000 3rd Qu.:1.000
## Max. :29.00 Max. :19.000 Max. :6.000 Max. :8.000
## PTS GmSc Season Playoffs
## Min. : 4.00 Min. : 9.60 Length:852 Length:852
## 1st Qu.:19.00 1st Qu.:18.80 Class :character Class :character
## Median :24.00 Median :24.00 Mode :character Mode :character
## Mean :25.98 Mean :25.01
## 3rd Qu.:32.00 3rd Qu.:30.00
## Max. :70.00 Max. :54.50
## Year GameIndex GmScMovingZ GmScMovingZTop2Delta
## Min. :1985 Min. : 0.0 Min. :2.170 Min. :0.0000
## 1st Qu.:1998 1st Qu.: 66.0 1st Qu.:3.230 1st Qu.:0.1500
## Median :2009 Median : 149.0 Median :3.640 Median :0.3500
## Mean :2007 Mean : 255.1 Mean :3.698 Mean :0.5196
## 3rd Qu.:2017 3rd Qu.: 359.8 3rd Qu.:4.080 3rd Qu.:0.7225
## Max. :2022 Max. :1427.0 Max. :6.750 Max. :3.5600
## Date2 GmSc2 GmScMovingZ2
## Length:852 Min. : 5.30 Min. :1.880
## Class :character 1st Qu.:17.20 1st Qu.:2.850
## Mode :character Median :21.45 Median :3.150
## Mean :22.69 Mean :3.178
## 3rd Qu.:27.30 3rd Qu.:3.470
## Max. :51.80 Max. :4.910
summary(df_4)
## bbrID Date Tm Opp
## Length:852 Length:852 Length:852 Length:852
## Class :character Class :character Class :character Class :character
## Mode :character Mode :character Mode :character Mode :character
##
##
##
## TRB AST STL BLK
## Min. : 0.000 Min. : 0.000 Min. :0.00 Min. :0.0000
## 1st Qu.: 4.000 1st Qu.: 1.000 1st Qu.:1.00 1st Qu.:0.0000
## Median : 7.000 Median : 3.000 Median :1.00 Median :0.0000
## Mean : 7.709 Mean : 3.627 Mean :1.62 Mean :0.9284
## 3rd Qu.:10.250 3rd Qu.: 5.000 3rd Qu.:2.00 3rd Qu.:1.0000
## Max. :29.000 Max. :22.000 Max. :7.00 Max. :9.0000
## PTS GmSc Season Playoffs
## Min. : 4.00 Min. : 6.40 Length:852 Length:852
## 1st Qu.:19.00 1st Qu.:19.38 Class :character Class :character
## Median :25.00 Median :24.20 Mode :character Mode :character
## Mean :26.49 Mean :25.50
## 3rd Qu.:32.00 3rd Qu.:29.90
## Max. :69.00 Max. :64.60
## Year GameIndex GmScMovingZ GmScMovingZTop2Delta
## Min. :1985 Min. : 2.0 Min. :2.170 Min. :0.0000
## 1st Qu.:1998 1st Qu.: 76.0 1st Qu.:3.270 1st Qu.:0.1675
## Median :2008 Median : 155.0 Median :3.670 Median :0.3750
## Mean :2007 Mean : 244.4 Mean :3.712 Mean :0.5077
## 3rd Qu.:2017 3rd Qu.: 356.0 3rd Qu.:4.060 3rd Qu.:0.7000
## Max. :2022 Max. :1592.0 Max. :6.220 Max. :3.7300
## Date2 GmSc2 GmScMovingZ2
## Length:852 Min. : 5.30 Min. :1.880
## Class :character 1st Qu.:17.07 1st Qu.:2.880
## Mode :character Median :21.40 Median :3.200
## Mean :23.17 Mean :3.204
## 3rd Qu.:28.02 3rd Qu.:3.502
## Max. :53.80 Max. :4.600
summary(df_5)
## bbrID Date Tm Opp
## Length:852 Length:852 Length:852 Length:852
## Class :character Class :character Class :character Class :character
## Mode :character Mode :character Mode :character Mode :character
##
##
##
## TRB AST STL BLK
## Min. : 0.000 Min. : 0.000 Min. :0.000 Min. :0.0000
## 1st Qu.: 4.000 1st Qu.: 1.000 1st Qu.:1.000 1st Qu.:0.0000
## Median : 7.000 Median : 3.000 Median :1.000 Median :0.0000
## Mean : 7.528 Mean : 3.641 Mean :1.641 Mean :0.9566
## 3rd Qu.:10.000 3rd Qu.: 5.000 3rd Qu.:2.000 3rd Qu.:1.0000
## Max. :29.000 Max. :18.000 Max. :7.000 Max. :9.0000
## PTS GmSc Season Playoffs
## Min. : 4.00 Min. : 9.60 Length:852 Length:852
## 1st Qu.:19.00 1st Qu.:19.07 Class :character Class :character
## Median :24.00 Median :24.30 Mode :character Mode :character
## Mean :25.51 Mean :24.80
## 3rd Qu.:31.00 3rd Qu.:29.20
## Max. :70.00 Max. :54.50
## Year GameIndex GmScMovingZ GmScMovingZTop2Delta
## Min. :1985 Min. : 0.0 Min. :2.170 Min. :0.000
## 1st Qu.:1998 1st Qu.: 70.0 1st Qu.:3.208 1st Qu.:0.130
## Median :2008 Median : 138.0 Median :3.610 Median :0.330
## Mean :2007 Mean : 236.1 Mean :3.660 Mean :0.477
## 3rd Qu.:2018 3rd Qu.: 320.2 3rd Qu.:4.030 3rd Qu.:0.670
## Max. :2022 Max. :1592.0 Max. :6.750 Max. :3.360
## Date2 GmSc2 GmScMovingZ2
## Length:852 Min. : 7.30 Min. :1.840
## Class :character 1st Qu.:16.80 1st Qu.:2.830
## Mode :character Median :21.30 Median :3.170
## Mean :22.32 Mean :3.183
## 3rd Qu.:26.23 3rd Qu.:3.520
## Max. :53.80 Max. :5.110
## Subsample Analysis
# Compare distributions across subsamples
cat("Distribution of Teams in Each Sample:\n")
## Distribution of Teams in Each Sample:
list(df_1, df_2, df_3, df_4, df_5) %>%
purrr::imap(~ {
team_counts <- .x %>% group_by(Tm) %>% summarise(count = n())
cat(paste0("Sample df_", .y, ":\n"))
print(team_counts)
})
## Sample df_1:
## # A tibble: 38 × 2
## Tm count
## <chr> <int>
## 1 ATL 30
## 2 BOS 32
## 3 BRK 9
## 4 CHA 7
## 5 CHH 3
## 6 CHI 40
## 7 CHO 6
## 8 CLE 39
## 9 DAL 23
## 10 DEN 31
## # ℹ 28 more rows
## Sample df_2:
## # A tibble: 38 × 2
## Tm count
## <chr> <int>
## 1 ATL 23
## 2 BOS 26
## 3 BRK 16
## 4 CHA 7
## 5 CHH 7
## 6 CHI 14
## 7 CHO 9
## 8 CLE 35
## 9 DAL 34
## 10 DEN 24
## # ℹ 28 more rows
## Sample df_3:
## # A tibble: 38 × 2
## Tm count
## <chr> <int>
## 1 ATL 28
## 2 BOS 36
## 3 BRK 24
## 4 CHA 8
## 5 CHH 9
## 6 CHI 27
## 7 CHO 8
## 8 CLE 29
## 9 DAL 33
## 10 DEN 26
## # ℹ 28 more rows
## Sample df_4:
## # A tibble: 38 × 2
## Tm count
## <chr> <int>
## 1 ATL 26
## 2 BOS 38
## 3 BRK 14
## 4 CHA 13
## 5 CHH 6
## 6 CHI 20
## 7 CHO 7
## 8 CLE 33
## 9 DAL 26
## 10 DEN 31
## # ℹ 28 more rows
## Sample df_5:
## # A tibble: 38 × 2
## Tm count
## <chr> <int>
## 1 ATL 35
## 2 BOS 24
## 3 BRK 11
## 4 CHA 7
## 5 CHH 15
## 6 CHI 22
## 7 CHO 11
## 8 CLE 38
## 9 DAL 18
## 10 DEN 35
## # ℹ 28 more rows
## [[1]]
## # A tibble: 38 × 2
## Tm count
## <chr> <int>
## 1 ATL 30
## 2 BOS 32
## 3 BRK 9
## 4 CHA 7
## 5 CHH 3
## 6 CHI 40
## 7 CHO 6
## 8 CLE 39
## 9 DAL 23
## 10 DEN 31
## # ℹ 28 more rows
##
## [[2]]
## # A tibble: 38 × 2
## Tm count
## <chr> <int>
## 1 ATL 23
## 2 BOS 26
## 3 BRK 16
## 4 CHA 7
## 5 CHH 7
## 6 CHI 14
## 7 CHO 9
## 8 CLE 35
## 9 DAL 34
## 10 DEN 24
## # ℹ 28 more rows
##
## [[3]]
## # A tibble: 38 × 2
## Tm count
## <chr> <int>
## 1 ATL 28
## 2 BOS 36
## 3 BRK 24
## 4 CHA 8
## 5 CHH 9
## 6 CHI 27
## 7 CHO 8
## 8 CLE 29
## 9 DAL 33
## 10 DEN 26
## # ℹ 28 more rows
##
## [[4]]
## # A tibble: 38 × 2
## Tm count
## <chr> <int>
## 1 ATL 26
## 2 BOS 38
## 3 BRK 14
## 4 CHA 13
## 5 CHH 6
## 6 CHI 20
## 7 CHO 7
## 8 CLE 33
## 9 DAL 26
## 10 DEN 31
## # ℹ 28 more rows
##
## [[5]]
## # A tibble: 38 × 2
## Tm count
## <chr> <int>
## 1 ATL 35
## 2 BOS 24
## 3 BRK 11
## 4 CHA 7
## 5 CHH 15
## 6 CHI 22
## 7 CHO 11
## 8 CLE 38
## 9 DAL 18
## 10 DEN 35
## # ℹ 28 more rows
# Identify anomalies by checking for outliers
merged_data <- bind_rows(df_1 %>% mutate(Sample = "df_1"),
df_2 %>% mutate(Sample = "df_2"),
df_3 %>% mutate(Sample = "df_3"),
df_4 %>% mutate(Sample = "df_4"),
df_5 %>% mutate(Sample = "df_5"))
# Combined Box Plot for Easier Comparison
ggplot(merged_data, aes(x = Sample, y = PTS)) +
geom_boxplot() +
theme_minimal() +
labs(title = "Points Distribution Across Samples",
x = "Sample",
y = "Points")

# Find common patterns across all subsamples
common_summary <- function(df) {
df %>% summarise(
avg_pts = mean(PTS, na.rm = TRUE),
avg_ast = mean(AST, na.rm = TRUE),
unique_teams = n_distinct(Tm)
)
}
# Generate common summaries for each sample
common_summary(df_1)
## avg_pts avg_ast unique_teams
## 1 25.81455 3.883803 38
common_summary(df_2)
## avg_pts avg_ast unique_teams
## 1 26.91667 3.706573 38
common_summary(df_3)
## avg_pts avg_ast unique_teams
## 1 25.98474 3.805164 38
common_summary(df_4)
## avg_pts avg_ast unique_teams
## 1 26.49178 3.626761 38
common_summary(df_5)
## avg_pts avg_ast unique_teams
## 1 25.50587 3.640845 38
# Interpretation of Summary Statistics
cat("\nSummary Interpretation:\n")
##
## Summary Interpretation:
cat("The average points (avg_pts) across samples are quite consistent, suggesting stability in player performance metrics.\n")
## The average points (avg_pts) across samples are quite consistent, suggesting stability in player performance metrics.
cat("Assist averages (avg_ast) also show minimal variation, indicating a uniform playmaking contribution across samples.\n")
## Assist averages (avg_ast) also show minimal variation, indicating a uniform playmaking contribution across samples.
cat("The unique_teams metric confirms a balanced team representation in each sample, ensuring no sampling bias.\n")
## The unique_teams metric confirms a balanced team representation in each sample, ensuring no sampling bias.
# Visualization of common summary metrics
common_summaries <- bind_rows(
common_summary(df_1) %>% mutate(Sample = "df_1"),
common_summary(df_2) %>% mutate(Sample = "df_2"),
common_summary(df_3) %>% mutate(Sample = "df_3"),
common_summary(df_4) %>% mutate(Sample = "df_4"),
common_summary(df_5) %>% mutate(Sample = "df_5")
)
# Bar plot for avg_pts, avg_ast, and unique_teams
common_summaries_long <- common_summaries %>%
pivot_longer(cols = c(avg_pts, avg_ast, unique_teams),
names_to = "Metric",
values_to = "Value")
ggplot(common_summaries_long, aes(x = Sample, y = Value, fill = Metric)) +
geom_bar(stat = "identity", position = "dodge") +
theme_minimal() +
labs(title = "Comparison of Key Metrics Across Samples",
x = "Sample",
y = "Value")
