title: “NBA Data Dive assignment_4” author: “Sneha” date: “2025-02-10” —

Introduction

This analysis investigates five random subsamples of the NBA dataset, each consisting of 50% of the data. The goal is to explore the variability in these samples, identify anomalies, and assess data consistency.

library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library(ggplot2)

Load Data

nba_data <- read.csv("C:/Statistics/nba.csv")
dim(nba_data) # Checking dataset dimensions
## [1] 1703   19

Creating Five Random Samples

set.seed(123)
sample_size <- nrow(nba_data) * 0.5

df_1 <- nba_data[sample(1:nrow(nba_data), sample_size, replace = TRUE), ]
df_2 <- nba_data[sample(1:nrow(nba_data), sample_size, replace = TRUE), ]
df_3 <- nba_data[sample(1:nrow(nba_data), sample_size, replace = TRUE), ]
df_4 <- nba_data[sample(1:nrow(nba_data), sample_size, replace = TRUE), ]
df_5 <- nba_data[sample(1:nrow(nba_data), sample_size, replace = TRUE), ]

Summary of Each Sample

lapply(list(df_1, df_2, df_3, df_4, df_5), summary)
## [[1]]
##     bbrID               Date                Tm                Opp           
##  Length:851         Length:851         Length:851         Length:851        
##  Class :character   Class :character   Class :character   Class :character  
##  Mode  :character   Mode  :character   Mode  :character   Mode  :character  
##                                                                             
##                                                                             
##                                                                             
##       TRB              AST              STL             BLK        
##  Min.   : 0.000   Min.   : 0.000   Min.   :0.000   Min.   :0.0000  
##  1st Qu.: 4.000   1st Qu.: 1.000   1st Qu.:1.000   1st Qu.:0.0000  
##  Median : 7.000   Median : 3.000   Median :1.000   Median :0.0000  
##  Mean   : 7.415   Mean   : 3.804   Mean   :1.719   Mean   :0.8766  
##  3rd Qu.:10.000   3rd Qu.: 5.000   3rd Qu.:2.000   3rd Qu.:1.0000  
##  Max.   :29.000   Max.   :18.000   Max.   :9.000   Max.   :9.0000  
##       PTS             GmSc          Season            Playoffs        
##  Min.   : 4.00   Min.   : 7.80   Length:851         Length:851        
##  1st Qu.:18.00   1st Qu.:18.60   Class :character   Class :character  
##  Median :24.00   Median :23.80   Mode  :character   Mode  :character  
##  Mean   :25.68   Mean   :24.89                                        
##  3rd Qu.:32.00   3rd Qu.:30.35                                        
##  Max.   :60.00   Max.   :51.50                                        
##       Year        GameIndex       GmScMovingZ    GmScMovingZTop2Delta
##  Min.   :1985   Min.   :   0.0   Min.   :2.190   Min.   :0.0000      
##  1st Qu.:1997   1st Qu.:  68.5   1st Qu.:3.200   1st Qu.:0.1400      
##  Median :2008   Median : 157.0   Median :3.600   Median :0.3300      
##  Mean   :2007   Mean   : 267.0   Mean   :3.648   Mean   :0.4724      
##  3rd Qu.:2016   3rd Qu.: 400.0   3rd Qu.:3.995   3rd Qu.:0.6500      
##  Max.   :2022   Max.   :1592.0   Max.   :6.750   Max.   :3.3600      
##     Date2               GmSc2        GmScMovingZ2  
##  Length:851         Min.   : 7.10   Min.   :1.880  
##  Class :character   1st Qu.:17.20   1st Qu.:2.840  
##  Mode  :character   Median :21.70   Median :3.200  
##                     Mean   :22.76   Mean   :3.176  
##                     3rd Qu.:27.75   3rd Qu.:3.490  
##                     Max.   :53.80   Max.   :4.910  
## 
## [[2]]
##     bbrID               Date                Tm                Opp           
##  Length:851         Length:851         Length:851         Length:851        
##  Class :character   Class :character   Class :character   Class :character  
##  Mode  :character   Mode  :character   Mode  :character   Mode  :character  
##                                                                             
##                                                                             
##                                                                             
##       TRB              AST              STL             BLK        
##  Min.   : 0.000   Min.   : 0.000   Min.   :0.000   Min.   :0.0000  
##  1st Qu.: 4.000   1st Qu.: 1.000   1st Qu.:1.000   1st Qu.:0.0000  
##  Median : 7.000   Median : 3.000   Median :1.000   Median :0.0000  
##  Mean   : 7.323   Mean   : 3.646   Mean   :1.702   Mean   :0.9683  
##  3rd Qu.:10.000   3rd Qu.: 5.000   3rd Qu.:2.000   3rd Qu.:1.0000  
##  Max.   :26.000   Max.   :22.000   Max.   :7.000   Max.   :8.0000  
##       PTS             GmSc          Season            Playoffs        
##  Min.   : 4.00   Min.   : 8.70   Length:851         Length:851        
##  1st Qu.:18.00   1st Qu.:18.70   Class :character   Class :character  
##  Median :24.00   Median :23.80   Mode  :character   Mode  :character  
##  Mean   :25.75   Mean   :24.98                                        
##  3rd Qu.:32.00   3rd Qu.:30.00                                        
##  Max.   :81.00   Max.   :63.50                                        
##       Year        GameIndex       GmScMovingZ    GmScMovingZTop2Delta
##  Min.   :1985   Min.   :   0.0   Min.   :2.190   Min.   :0.0000      
##  1st Qu.:1997   1st Qu.:  72.5   1st Qu.:3.240   1st Qu.:0.1450      
##  Median :2008   Median : 148.0   Median :3.610   Median :0.3300      
##  Mean   :2007   Mean   : 253.9   Mean   :3.688   Mean   :0.5213      
##  3rd Qu.:2017   3rd Qu.: 353.0   3rd Qu.:4.070   3rd Qu.:0.7200      
##  Max.   :2022   Max.   :1455.0   Max.   :6.750   Max.   :3.7300      
##     Date2               GmSc2       GmScMovingZ2  
##  Length:851         Min.   : 7.5   Min.   :1.840  
##  Class :character   1st Qu.:16.7   1st Qu.:2.840  
##  Mode  :character   Median :21.2   Median :3.140  
##                     Mean   :22.4   Mean   :3.166  
##                     3rd Qu.:27.4   3rd Qu.:3.470  
##                     Max.   :50.8   Max.   :4.910  
## 
## [[3]]
##     bbrID               Date                Tm                Opp           
##  Length:851         Length:851         Length:851         Length:851        
##  Class :character   Class :character   Class :character   Class :character  
##  Mode  :character   Mode  :character   Mode  :character   Mode  :character  
##                                                                             
##                                                                             
##                                                                             
##       TRB              AST              STL              BLK        
##  Min.   : 0.000   Min.   : 0.000   Min.   : 0.000   Min.   :0.0000  
##  1st Qu.: 4.000   1st Qu.: 1.000   1st Qu.: 1.000   1st Qu.:0.0000  
##  Median : 7.000   Median : 3.000   Median : 1.000   Median :0.0000  
##  Mean   : 7.522   Mean   : 3.732   Mean   : 1.706   Mean   :0.9283  
##  3rd Qu.:10.000   3rd Qu.: 5.000   3rd Qu.: 3.000   3rd Qu.:1.0000  
##  Max.   :26.000   Max.   :22.000   Max.   :10.000   Max.   :9.0000  
##       PTS            GmSc          Season            Playoffs        
##  Min.   : 5.0   Min.   : 7.80   Length:851         Length:851        
##  1st Qu.:18.0   1st Qu.:18.90   Class :character   Class :character  
##  Median :25.0   Median :24.40   Mode  :character   Mode  :character  
##  Mean   :26.2   Mean   :25.45                                        
##  3rd Qu.:32.0   3rd Qu.:30.70                                        
##  Max.   :62.0   Max.   :60.20                                        
##       Year        GameIndex       GmScMovingZ    GmScMovingZTop2Delta
##  Min.   :1985   Min.   :   3.0   Min.   :2.190   Min.   :0.000       
##  1st Qu.:1997   1st Qu.:  71.5   1st Qu.:3.250   1st Qu.:0.150       
##  Median :2008   Median : 150.0   Median :3.630   Median :0.340       
##  Mean   :2007   Mean   : 250.7   Mean   :3.703   Mean   :0.521       
##  3rd Qu.:2017   3rd Qu.: 364.5   3rd Qu.:4.040   3rd Qu.:0.725       
##  Max.   :2022   Max.   :1455.0   Max.   :6.750   Max.   :3.360       
##     Date2               GmSc2       GmScMovingZ2  
##  Length:851         Min.   : 5.3   Min.   :1.880  
##  Class :character   1st Qu.:16.8   1st Qu.:2.845  
##  Mode  :character   Median :21.7   Median :3.170  
##                     Mean   :22.8   Mean   :3.182  
##                     3rd Qu.:27.8   3rd Qu.:3.470  
##                     Max.   :51.8   Max.   :4.630  
## 
## [[4]]
##     bbrID               Date                Tm                Opp           
##  Length:851         Length:851         Length:851         Length:851        
##  Class :character   Class :character   Class :character   Class :character  
##  Mode  :character   Mode  :character   Mode  :character   Mode  :character  
##                                                                             
##                                                                             
##                                                                             
##       TRB              AST             STL              BLK        
##  Min.   : 0.000   Min.   : 0.00   Min.   : 0.000   Min.   :0.0000  
##  1st Qu.: 4.000   1st Qu.: 1.00   1st Qu.: 1.000   1st Qu.:0.0000  
##  Median : 7.000   Median : 3.00   Median : 1.000   Median :0.0000  
##  Mean   : 7.345   Mean   : 3.74   Mean   : 1.747   Mean   :0.8437  
##  3rd Qu.:10.000   3rd Qu.: 5.00   3rd Qu.: 3.000   3rd Qu.:1.0000  
##  Max.   :29.000   Max.   :17.00   Max.   :10.000   Max.   :8.0000  
##       PTS             GmSc          Season            Playoffs        
##  Min.   : 6.00   Min.   : 6.40   Length:851         Length:851        
##  1st Qu.:19.00   1st Qu.:18.90   Class :character   Class :character  
##  Median :24.00   Median :24.20   Mode  :character   Mode  :character  
##  Mean   :26.17   Mean   :25.21                                        
##  3rd Qu.:32.00   3rd Qu.:30.40                                        
##  Max.   :62.00   Max.   :56.60                                        
##       Year        GameIndex       GmScMovingZ    GmScMovingZTop2Delta
##  Min.   :1985   Min.   :   1.0   Min.   :2.170   Min.   :0.0000      
##  1st Qu.:1999   1st Qu.:  77.0   1st Qu.:3.250   1st Qu.:0.1500      
##  Median :2009   Median : 149.0   Median :3.620   Median :0.3300      
##  Mean   :2008   Mean   : 248.7   Mean   :3.663   Mean   :0.4865      
##  3rd Qu.:2017   3rd Qu.: 353.5   3rd Qu.:3.980   3rd Qu.:0.6800      
##  Max.   :2022   Max.   :1592.0   Max.   :6.750   Max.   :3.7300      
##     Date2               GmSc2        GmScMovingZ2  
##  Length:851         Min.   : 6.20   Min.   :1.920  
##  Class :character   1st Qu.:16.90   1st Qu.:2.880  
##  Mode  :character   Median :21.40   Median :3.180  
##                     Mean   :22.54   Mean   :3.177  
##                     3rd Qu.:27.35   3rd Qu.:3.460  
##                     Max.   :53.80   Max.   :4.710  
## 
## [[5]]
##     bbrID               Date                Tm                Opp           
##  Length:851         Length:851         Length:851         Length:851        
##  Class :character   Class :character   Class :character   Class :character  
##  Mode  :character   Mode  :character   Mode  :character   Mode  :character  
##                                                                             
##                                                                             
##                                                                             
##       TRB              AST              STL             BLK        
##  Min.   : 0.000   Min.   : 0.000   Min.   :0.000   Min.   :0.0000  
##  1st Qu.: 4.000   1st Qu.: 1.000   1st Qu.:1.000   1st Qu.:0.0000  
##  Median : 7.000   Median : 3.000   Median :1.000   Median :1.0000  
##  Mean   : 7.459   Mean   : 3.651   Mean   :1.699   Mean   :0.9718  
##  3rd Qu.:10.000   3rd Qu.: 5.000   3rd Qu.:3.000   3rd Qu.:1.0000  
##  Max.   :26.000   Max.   :19.000   Max.   :9.000   Max.   :8.0000  
##       PTS             GmSc          Season            Playoffs        
##  Min.   : 5.00   Min.   : 6.40   Length:851         Length:851        
##  1st Qu.:18.50   1st Qu.:18.70   Class :character   Class :character  
##  Median :24.00   Median :23.50   Mode  :character   Mode  :character  
##  Mean   :25.73   Mean   :24.99                                        
##  3rd Qu.:32.00   3rd Qu.:30.15                                        
##  Max.   :81.00   Max.   :63.50                                        
##       Year        GameIndex       GmScMovingZ    GmScMovingZTop2Delta
##  Min.   :1985   Min.   :   0.0   Min.   :2.220   Min.   :0.0000      
##  1st Qu.:1997   1st Qu.:  66.0   1st Qu.:3.265   1st Qu.:0.1400      
##  Median :2007   Median : 143.0   Median :3.610   Median :0.3400      
##  Mean   :2006   Mean   : 245.6   Mean   :3.691   Mean   :0.5021      
##  3rd Qu.:2016   3rd Qu.: 353.0   3rd Qu.:4.040   3rd Qu.:0.6950      
##  Max.   :2022   Max.   :1427.0   Max.   :6.010   Max.   :3.7300      
##     Date2               GmSc2        GmScMovingZ2  
##  Length:851         Min.   : 6.20   Min.   :1.840  
##  Class :character   1st Qu.:16.80   1st Qu.:2.880  
##  Mode  :character   Median :21.00   Median :3.170  
##                     Mean   :22.15   Mean   :3.189  
##                     3rd Qu.:26.50   3rd Qu.:3.470  
##                     Max.   :53.80   Max.   :4.910

Categorical Data Analysis

table(df_1$Team)
## < table of extent 0 >
table(df_2$Team)
## < table of extent 0 >
table(df_3$Team)
## < table of extent 0 >
table(df_4$Team)
## < table of extent 0 >
table(df_5$Team)
## < table of extent 0 >

Numerical Data Analysis

summary(df_1$Points)
## Length  Class   Mode 
##      0   NULL   NULL
summary(df_2$Points)
## Length  Class   Mode 
##      0   NULL   NULL
summary(df_3$Points)
## Length  Class   Mode 
##      0   NULL   NULL
summary(df_4$Points)
## Length  Class   Mode 
##      0   NULL   NULL
summary(df_5$Points)
## Length  Class   Mode 
##      0   NULL   NULL

Comparison of Samples

ggplot(nba_data, aes(x = PTS)) +
  geom_histogram(binwidth = 2, fill = "blue", alpha = 0.5) +
  ggtitle("Distribution of Points in Original Dataset")

Insights & Conclusion

```