title: “NBA Data Dive assignment_4” author: “Sneha” date: “2025-02-10” —
This analysis investigates five random subsamples of the NBA dataset, each consisting of 50% of the data. The goal is to explore the variability in these samples, identify anomalies, and assess data consistency.
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(ggplot2)
nba_data <- read.csv("C:/Statistics/nba.csv")
dim(nba_data) # Checking dataset dimensions
## [1] 1703 19
set.seed(123)
sample_size <- nrow(nba_data) * 0.5
df_1 <- nba_data[sample(1:nrow(nba_data), sample_size, replace = TRUE), ]
df_2 <- nba_data[sample(1:nrow(nba_data), sample_size, replace = TRUE), ]
df_3 <- nba_data[sample(1:nrow(nba_data), sample_size, replace = TRUE), ]
df_4 <- nba_data[sample(1:nrow(nba_data), sample_size, replace = TRUE), ]
df_5 <- nba_data[sample(1:nrow(nba_data), sample_size, replace = TRUE), ]
lapply(list(df_1, df_2, df_3, df_4, df_5), summary)
## [[1]]
## bbrID Date Tm Opp
## Length:851 Length:851 Length:851 Length:851
## Class :character Class :character Class :character Class :character
## Mode :character Mode :character Mode :character Mode :character
##
##
##
## TRB AST STL BLK
## Min. : 0.000 Min. : 0.000 Min. :0.000 Min. :0.0000
## 1st Qu.: 4.000 1st Qu.: 1.000 1st Qu.:1.000 1st Qu.:0.0000
## Median : 7.000 Median : 3.000 Median :1.000 Median :0.0000
## Mean : 7.415 Mean : 3.804 Mean :1.719 Mean :0.8766
## 3rd Qu.:10.000 3rd Qu.: 5.000 3rd Qu.:2.000 3rd Qu.:1.0000
## Max. :29.000 Max. :18.000 Max. :9.000 Max. :9.0000
## PTS GmSc Season Playoffs
## Min. : 4.00 Min. : 7.80 Length:851 Length:851
## 1st Qu.:18.00 1st Qu.:18.60 Class :character Class :character
## Median :24.00 Median :23.80 Mode :character Mode :character
## Mean :25.68 Mean :24.89
## 3rd Qu.:32.00 3rd Qu.:30.35
## Max. :60.00 Max. :51.50
## Year GameIndex GmScMovingZ GmScMovingZTop2Delta
## Min. :1985 Min. : 0.0 Min. :2.190 Min. :0.0000
## 1st Qu.:1997 1st Qu.: 68.5 1st Qu.:3.200 1st Qu.:0.1400
## Median :2008 Median : 157.0 Median :3.600 Median :0.3300
## Mean :2007 Mean : 267.0 Mean :3.648 Mean :0.4724
## 3rd Qu.:2016 3rd Qu.: 400.0 3rd Qu.:3.995 3rd Qu.:0.6500
## Max. :2022 Max. :1592.0 Max. :6.750 Max. :3.3600
## Date2 GmSc2 GmScMovingZ2
## Length:851 Min. : 7.10 Min. :1.880
## Class :character 1st Qu.:17.20 1st Qu.:2.840
## Mode :character Median :21.70 Median :3.200
## Mean :22.76 Mean :3.176
## 3rd Qu.:27.75 3rd Qu.:3.490
## Max. :53.80 Max. :4.910
##
## [[2]]
## bbrID Date Tm Opp
## Length:851 Length:851 Length:851 Length:851
## Class :character Class :character Class :character Class :character
## Mode :character Mode :character Mode :character Mode :character
##
##
##
## TRB AST STL BLK
## Min. : 0.000 Min. : 0.000 Min. :0.000 Min. :0.0000
## 1st Qu.: 4.000 1st Qu.: 1.000 1st Qu.:1.000 1st Qu.:0.0000
## Median : 7.000 Median : 3.000 Median :1.000 Median :0.0000
## Mean : 7.323 Mean : 3.646 Mean :1.702 Mean :0.9683
## 3rd Qu.:10.000 3rd Qu.: 5.000 3rd Qu.:2.000 3rd Qu.:1.0000
## Max. :26.000 Max. :22.000 Max. :7.000 Max. :8.0000
## PTS GmSc Season Playoffs
## Min. : 4.00 Min. : 8.70 Length:851 Length:851
## 1st Qu.:18.00 1st Qu.:18.70 Class :character Class :character
## Median :24.00 Median :23.80 Mode :character Mode :character
## Mean :25.75 Mean :24.98
## 3rd Qu.:32.00 3rd Qu.:30.00
## Max. :81.00 Max. :63.50
## Year GameIndex GmScMovingZ GmScMovingZTop2Delta
## Min. :1985 Min. : 0.0 Min. :2.190 Min. :0.0000
## 1st Qu.:1997 1st Qu.: 72.5 1st Qu.:3.240 1st Qu.:0.1450
## Median :2008 Median : 148.0 Median :3.610 Median :0.3300
## Mean :2007 Mean : 253.9 Mean :3.688 Mean :0.5213
## 3rd Qu.:2017 3rd Qu.: 353.0 3rd Qu.:4.070 3rd Qu.:0.7200
## Max. :2022 Max. :1455.0 Max. :6.750 Max. :3.7300
## Date2 GmSc2 GmScMovingZ2
## Length:851 Min. : 7.5 Min. :1.840
## Class :character 1st Qu.:16.7 1st Qu.:2.840
## Mode :character Median :21.2 Median :3.140
## Mean :22.4 Mean :3.166
## 3rd Qu.:27.4 3rd Qu.:3.470
## Max. :50.8 Max. :4.910
##
## [[3]]
## bbrID Date Tm Opp
## Length:851 Length:851 Length:851 Length:851
## Class :character Class :character Class :character Class :character
## Mode :character Mode :character Mode :character Mode :character
##
##
##
## TRB AST STL BLK
## Min. : 0.000 Min. : 0.000 Min. : 0.000 Min. :0.0000
## 1st Qu.: 4.000 1st Qu.: 1.000 1st Qu.: 1.000 1st Qu.:0.0000
## Median : 7.000 Median : 3.000 Median : 1.000 Median :0.0000
## Mean : 7.522 Mean : 3.732 Mean : 1.706 Mean :0.9283
## 3rd Qu.:10.000 3rd Qu.: 5.000 3rd Qu.: 3.000 3rd Qu.:1.0000
## Max. :26.000 Max. :22.000 Max. :10.000 Max. :9.0000
## PTS GmSc Season Playoffs
## Min. : 5.0 Min. : 7.80 Length:851 Length:851
## 1st Qu.:18.0 1st Qu.:18.90 Class :character Class :character
## Median :25.0 Median :24.40 Mode :character Mode :character
## Mean :26.2 Mean :25.45
## 3rd Qu.:32.0 3rd Qu.:30.70
## Max. :62.0 Max. :60.20
## Year GameIndex GmScMovingZ GmScMovingZTop2Delta
## Min. :1985 Min. : 3.0 Min. :2.190 Min. :0.000
## 1st Qu.:1997 1st Qu.: 71.5 1st Qu.:3.250 1st Qu.:0.150
## Median :2008 Median : 150.0 Median :3.630 Median :0.340
## Mean :2007 Mean : 250.7 Mean :3.703 Mean :0.521
## 3rd Qu.:2017 3rd Qu.: 364.5 3rd Qu.:4.040 3rd Qu.:0.725
## Max. :2022 Max. :1455.0 Max. :6.750 Max. :3.360
## Date2 GmSc2 GmScMovingZ2
## Length:851 Min. : 5.3 Min. :1.880
## Class :character 1st Qu.:16.8 1st Qu.:2.845
## Mode :character Median :21.7 Median :3.170
## Mean :22.8 Mean :3.182
## 3rd Qu.:27.8 3rd Qu.:3.470
## Max. :51.8 Max. :4.630
##
## [[4]]
## bbrID Date Tm Opp
## Length:851 Length:851 Length:851 Length:851
## Class :character Class :character Class :character Class :character
## Mode :character Mode :character Mode :character Mode :character
##
##
##
## TRB AST STL BLK
## Min. : 0.000 Min. : 0.00 Min. : 0.000 Min. :0.0000
## 1st Qu.: 4.000 1st Qu.: 1.00 1st Qu.: 1.000 1st Qu.:0.0000
## Median : 7.000 Median : 3.00 Median : 1.000 Median :0.0000
## Mean : 7.345 Mean : 3.74 Mean : 1.747 Mean :0.8437
## 3rd Qu.:10.000 3rd Qu.: 5.00 3rd Qu.: 3.000 3rd Qu.:1.0000
## Max. :29.000 Max. :17.00 Max. :10.000 Max. :8.0000
## PTS GmSc Season Playoffs
## Min. : 6.00 Min. : 6.40 Length:851 Length:851
## 1st Qu.:19.00 1st Qu.:18.90 Class :character Class :character
## Median :24.00 Median :24.20 Mode :character Mode :character
## Mean :26.17 Mean :25.21
## 3rd Qu.:32.00 3rd Qu.:30.40
## Max. :62.00 Max. :56.60
## Year GameIndex GmScMovingZ GmScMovingZTop2Delta
## Min. :1985 Min. : 1.0 Min. :2.170 Min. :0.0000
## 1st Qu.:1999 1st Qu.: 77.0 1st Qu.:3.250 1st Qu.:0.1500
## Median :2009 Median : 149.0 Median :3.620 Median :0.3300
## Mean :2008 Mean : 248.7 Mean :3.663 Mean :0.4865
## 3rd Qu.:2017 3rd Qu.: 353.5 3rd Qu.:3.980 3rd Qu.:0.6800
## Max. :2022 Max. :1592.0 Max. :6.750 Max. :3.7300
## Date2 GmSc2 GmScMovingZ2
## Length:851 Min. : 6.20 Min. :1.920
## Class :character 1st Qu.:16.90 1st Qu.:2.880
## Mode :character Median :21.40 Median :3.180
## Mean :22.54 Mean :3.177
## 3rd Qu.:27.35 3rd Qu.:3.460
## Max. :53.80 Max. :4.710
##
## [[5]]
## bbrID Date Tm Opp
## Length:851 Length:851 Length:851 Length:851
## Class :character Class :character Class :character Class :character
## Mode :character Mode :character Mode :character Mode :character
##
##
##
## TRB AST STL BLK
## Min. : 0.000 Min. : 0.000 Min. :0.000 Min. :0.0000
## 1st Qu.: 4.000 1st Qu.: 1.000 1st Qu.:1.000 1st Qu.:0.0000
## Median : 7.000 Median : 3.000 Median :1.000 Median :1.0000
## Mean : 7.459 Mean : 3.651 Mean :1.699 Mean :0.9718
## 3rd Qu.:10.000 3rd Qu.: 5.000 3rd Qu.:3.000 3rd Qu.:1.0000
## Max. :26.000 Max. :19.000 Max. :9.000 Max. :8.0000
## PTS GmSc Season Playoffs
## Min. : 5.00 Min. : 6.40 Length:851 Length:851
## 1st Qu.:18.50 1st Qu.:18.70 Class :character Class :character
## Median :24.00 Median :23.50 Mode :character Mode :character
## Mean :25.73 Mean :24.99
## 3rd Qu.:32.00 3rd Qu.:30.15
## Max. :81.00 Max. :63.50
## Year GameIndex GmScMovingZ GmScMovingZTop2Delta
## Min. :1985 Min. : 0.0 Min. :2.220 Min. :0.0000
## 1st Qu.:1997 1st Qu.: 66.0 1st Qu.:3.265 1st Qu.:0.1400
## Median :2007 Median : 143.0 Median :3.610 Median :0.3400
## Mean :2006 Mean : 245.6 Mean :3.691 Mean :0.5021
## 3rd Qu.:2016 3rd Qu.: 353.0 3rd Qu.:4.040 3rd Qu.:0.6950
## Max. :2022 Max. :1427.0 Max. :6.010 Max. :3.7300
## Date2 GmSc2 GmScMovingZ2
## Length:851 Min. : 6.20 Min. :1.840
## Class :character 1st Qu.:16.80 1st Qu.:2.880
## Mode :character Median :21.00 Median :3.170
## Mean :22.15 Mean :3.189
## 3rd Qu.:26.50 3rd Qu.:3.470
## Max. :53.80 Max. :4.910
table(df_1$Team)
## < table of extent 0 >
table(df_2$Team)
## < table of extent 0 >
table(df_3$Team)
## < table of extent 0 >
table(df_4$Team)
## < table of extent 0 >
table(df_5$Team)
## < table of extent 0 >
summary(df_1$Points)
## Length Class Mode
## 0 NULL NULL
summary(df_2$Points)
## Length Class Mode
## 0 NULL NULL
summary(df_3$Points)
## Length Class Mode
## 0 NULL NULL
summary(df_4$Points)
## Length Class Mode
## 0 NULL NULL
summary(df_5$Points)
## Length Class Mode
## 0 NULL NULL
ggplot(nba_data, aes(x = PTS)) +
geom_histogram(binwidth = 2, fill = "blue", alpha = 0.5) +
ggtitle("Distribution of Points in Original Dataset")
```