Introduction

This analysis explores NBA data to derive insights into team and player performance. We address novel questions such as:

Data Preparation

library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library(ggplot2)
data <- read.csv("C:/Statistics/nba.csv")

head(data)
##       bbrID       Date  Tm Opp TRB AST STL BLK PTS GmSc  Season Playoffs Year
## 1 abdelal01 1993-03-16 BOS GSW  10   2   0   0  25 22.7 1992-93    false 1993
## 2 abdulma02 1991-04-02 DEN DAL   2   6   4   0  30 29.7 1990-91    false 1991
## 3 abdulta01 1998-04-19 SAC VAN   2   3   1   0  31 26.4 1997-98    false 1998
## 4 abdursh01 2001-11-23 ATL DET  12   5   2   1  50 46.0 2001-02    false 2002
## 5 abrinal01 2018-11-01 OKC CHO   2   0   0   0  25 17.1 2018-19    false 2019
## 6 achiupr01 2021-01-12 MIA PHI  13   3   0   1  17 16.9 2020-21    false 2021
##   GameIndex GmScMovingZ GmScMovingZTop2Delta      Date2 GmSc2 GmScMovingZ2
## 1       181        4.13                 0.24 1991-12-04  18.6         3.89
## 2        64        3.82                 0.64 1995-12-07  40.1         3.18
## 3        58        4.11                 1.67 1998-01-14  16.9         2.44
## 4       386        4.06                 0.84 2003-11-28  34.3         3.22
## 5       160        3.37                 0.18 2018-11-30  16.6         3.19
## 6         8        2.58                 0.05 2021-02-28  16.8         2.53

Summary Statistics

summary(data$PTS)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##    4.00   19.00   24.00   26.06   32.00   81.00
quantile(data$PTS, probs = seq(0, 1, 0.25))
##   0%  25%  50%  75% 100% 
##    4   19   24   32   81

Top 5 Teams with the Highest Average Points

top_teams <- data |>
  group_by(Tm) |>
  summarise(AveragePoints = mean(PTS, na.rm = TRUE)) |>
  arrange(desc(AveragePoints)) |>
  head(5)

print(top_teams)
## # A tibble: 5 × 2
##   Tm    AveragePoints
##   <chr>         <dbl>
## 1 PHO            29.9
## 2 NOP            28.9
## 3 CHI            28.5
## 4 HOU            28.4
## 5 POR            27.8

Distribution of Points

ggplot(data, aes(x = PTS)) +
  geom_histogram(binwidth = 5, fill = "blue", color = "black") +
  ggtitle("Distribution of Points")

Top 10 Players Based on Points

top_players <- data |>
  arrange(desc(PTS)) |>
  head(10)

ggplot(top_players, aes(x = reorder(bbrID, -PTS), y = PTS, fill = bbrID)) +
  geom_bar(stat = "identity") +
  ggtitle("Top 10 Players Based on Points") +
  xlab("Player") +
  ylab("Points per Game") +
  theme(axis.text.x = element_text(angle = 45, hjust = 1))

Conclusion

The analysis provides insights into team performance and player attributes. Future analysis can expand on these findings by incorporating additional variables and statistical modeling.