# Set working directory and path to data
setwd("C:/Users/LENOVO/Downloads/Regression Model") # Example path on Windows
# Clear the workspace
rm(list = ls()) # Clear environment
gc() # Clear unused memory
## used (Mb) gc trigger (Mb) max used (Mb)
## Ncells 524161 28.0 1167410 62.4 660385 35.3
## Vcells 961094 7.4 8388608 64.0 1769879 13.6
cat("\f") # Clear the console
dev.off # Clear the charts
## function (which = dev.cur())
## {
## if (which == 1)
## stop("cannot shut down device 1 (the null device)")
## .External(C_devoff, as.integer(which))
## dev.cur()
## }
## <bytecode: 0x000001c8107c7c60>
## <environment: namespace:grDevices>
library(ggplot2)
## Warning: package 'ggplot2' was built under R version 4.3.3
library(stargazer)
##
## Please cite as:
## Hlavac, Marek (2022). stargazer: Well-Formatted Regression and Summary Statistics Tables.
## R package version 5.2.3. https://CRAN.R-project.org/package=stargazer
library(readxl)
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
df <- read.csv("C:\\Users\\LENOVO\\Downloads\\Regression Model\\Project\\sportsref_download.csv")
head(df)
## Rk Team G MP FG FGA FG. X3P X3PA X3P. X2P X2PA
## 1 1 Milwaukee Bucks* 73 241.0 43.3 90.9 0.476 13.8 38.9 0.355 29.5 52.0
## 2 2 Houston Rockets* 72 241.4 40.8 90.4 0.451 15.6 45.3 0.345 25.1 45.2
## 3 3 Dallas Mavericks* 75 242.3 41.7 90.3 0.461 15.1 41.3 0.367 26.5 49.0
## 4 4 Los Angeles Clippers* 72 241.4 41.6 89.2 0.466 12.4 33.5 0.371 29.1 55.8
## 5 5 New Orleans Pelicans 72 242.1 42.6 91.6 0.465 13.6 36.9 0.370 28.9 54.8
## 6 6 Portland Trail Blazers* 74 241.0 42.2 91.2 0.463 12.9 34.1 0.377 29.3 57.1
## X2P. FT FTA FT. ORB DRB TRB AST STL BLK TOV PF PTS
## 1 0.567 18.3 24.7 0.742 9.5 42.2 51.7 25.9 7.2 5.9 15.1 19.6 118.7
## 2 0.557 20.6 26.1 0.791 9.8 34.5 44.3 21.6 8.7 5.2 14.7 21.8 117.8
## 3 0.541 18.6 23.8 0.779 10.5 36.4 46.9 24.7 6.1 4.8 12.7 19.5 117.0
## 4 0.522 20.8 26.3 0.791 10.7 37.0 47.7 23.7 7.1 4.7 14.6 22.1 116.3
## 5 0.528 17.1 23.4 0.729 11.1 35.4 46.5 26.8 7.5 5.0 16.4 21.2 115.8
## 6 0.514 17.7 22.1 0.804 10.2 35.1 45.3 20.6 6.3 6.1 12.8 21.7 115.0
df1 <- read.csv("C:\\Users\\LENOVO\\Downloads\\sportsref_download.csv")
head(df1)
## Rk Team Age W L PW PL MOV SOS SRS ORtg DRtg NRtg
## 1 1 Milwaukee Bucks* 29.2 56 17 57 16 10.08 -0.67 9.41 112.4 102.9 9.5
## 2 2 Boston Celtics* 25.3 48 24 50 22 6.31 -0.47 5.83 113.3 107.0 6.3
## 3 3 Los Angeles Clippers* 27.4 49 23 50 22 6.44 0.21 6.66 113.9 107.6 6.3
## 4 4 Toronto Raptors* 26.6 53 19 50 22 6.24 -0.26 5.97 111.1 105.0 6.1
## 5 5 Los Angeles Lakers* 29.5 52 19 48 23 5.79 0.49 6.28 112.0 106.3 5.7
## 6 6 Dallas Mavericks* 26.1 43 32 49 26 4.95 -0.07 4.87 116.7 111.7 5.0
## Pace FTr X3PAr TS. X Offense.Four.Factors X.1 X.2 X.3 X.4
## 1 105.1 0.271 0.428 0.583 NA eFG% TOV% ORB% FT/FGA NA
## 2 99.5 0.259 0.386 0.570 NA 0.552 12.9 20.7 0.201 NA
## 3 101.5 0.295 0.375 0.577 NA 0.531 12.2 23.9 0.207 NA
## 4 100.9 0.264 0.421 0.574 NA 0.535 12.6 23.5 0.233 NA
## 5 100.9 0.276 0.358 0.573 NA 0.536 13.1 21.3 0.21 NA
## 6 99.3 0.264 0.457 0.581 NA 0.542 13.3 24.5 0.201 NA
## Defense.Four.Factors X.5 X.6 X.7 X.8 X.9 X.10 X.11
## 1 eFG% TOV% DRB% FT/FGA NA Arena Attend. Attend./G
## 2 0.489 12 81.6 0.178 NA Fiserv Forum 549,036 17,711
## 3 0.509 13.5 77.4 0.215 NA TD Garden 610,864 19,090
## 4 0.506 12.2 77.6 0.206 NA STAPLES Center 610,176 19,068
## 5 0.502 14.6 76.7 0.202 NA Scotiabank Arena 633,456 19,796
## 6 0.515 14.1 78.8 0.205 NA STAPLES Center 588,907 18,997
I am going to modify the dataset with mutate function and use gsub(“\“,”“, .): This replaces any asterisks (*) with an empty string in all columns. This is typically done to clean data where asterisks might be used to denote missing values or special cases.
df <- df %>%
mutate(across(everything(), ~ gsub("\\*", "", .)))
df1 <- df1 %>%
mutate(across(everything(), ~ gsub("\\*", "", .)))
names(df)
## [1] "Rk" "Team" "G" "MP" "FG" "FGA" "FG." "X3P" "X3PA" "X3P."
## [11] "X2P" "X2PA" "X2P." "FT" "FTA" "FT." "ORB" "DRB" "TRB" "AST"
## [21] "STL" "BLK" "TOV" "PF" "PTS"
names(df1)
## [1] "Rk" "Team" "Age"
## [4] "W" "L" "PW"
## [7] "PL" "MOV" "SOS"
## [10] "SRS" "ORtg" "DRtg"
## [13] "NRtg" "Pace" "FTr"
## [16] "X3PAr" "TS." "X"
## [19] "Offense.Four.Factors" "X.1" "X.2"
## [22] "X.3" "X.4" "Defense.Four.Factors"
## [25] "X.5" "X.6" "X.7"
## [28] "X.8" "X.9" "X.10"
## [31] "X.11"
Here’s a breakdown of the variables in your NBA stats dataset, including their definitions:
These definitions cover the main statistical categories typically recorded in an NBA dataset, reflecting various aspects of player and team performance.
# Remove column 'b'
df_new <- subset(df1, select = -c(Age, Rk, PW, PL, MOV, SOS, SRS, ORtg, DRtg, NRtg, FTr, X3PAr, TS., X, Offense.Four.Factors, X.1, X.2, X.3, X.4, Defense.Four.Factors, X.5, X.6, X.7, X.8, X.9, X.10, X.11))
head(df_new)
## Team W L Pace
## 1 Milwaukee Bucks 56 17 105.1
## 2 Boston Celtics 48 24 99.5
## 3 Los Angeles Clippers 49 23 101.5
## 4 Toronto Raptors 53 19 100.9
## 5 Los Angeles Lakers 52 19 100.9
## 6 Dallas Mavericks 43 32 99.3
merged_df <- merge(df, df_new, by = "Team")
names(merged_df)
## [1] "Team" "Rk" "G" "MP" "FG" "FGA" "FG." "X3P" "X3PA" "X3P."
## [11] "X2P" "X2PA" "X2P." "FT" "FTA" "FT." "ORB" "DRB" "TRB" "AST"
## [21] "STL" "BLK" "TOV" "PF" "PTS" "W" "L" "Pace"
df_2019 <- subset(merged_df, select = -c(Rk, X2P, X2PA, X2P.))
df_2019 <- df_2019 %>% mutate(Year = 2019)
names(df_2019)
## [1] "Team" "G" "MP" "FG" "FGA" "FG." "X3P" "X3PA" "X3P." "FT"
## [11] "FTA" "FT." "ORB" "DRB" "TRB" "AST" "STL" "BLK" "TOV" "PF"
## [21] "PTS" "W" "L" "Pace" "Year"
names(df_2019) <- c("Team", "Games", "MinutesPlayed", "FieldGoal", "FieldGoalsAttempted", "FieldGoalsPercentage", "3Point", "3PointAttempted", "3PointPercentage", "FreeThrows", "FreeThrowsAttempted", "FreeThrowPercentage", "Offensive Rebounds", "Defensive Rebounds", "Total Rebounds", "Assists", "Steals", "Blocks", "Turnovers", "PersonalFouls", "Points", "Wins", "Losses", "Pace", "Year")
df_2019 <- df_2019 %>%
relocate(Wins, Losses, .before = MinutesPlayed) %>%
relocate(Year, .before = Games) %>% relocate(Pace, .before = Points)
df21 <- read.csv("C:\\Users\\LENOVO\\Downloads\\Regression Model\\Project\\sportsref_download (1).csv")
df2 <- read.csv("C:\\Users\\LENOVO\\Downloads\\2020-21.csv")
head(df21)
## Rk Team G MP FG FGA FG. X3P X3PA X3P. X2P X2PA
## 1 1 Milwaukee Bucks* 72 240.7 44.7 91.8 0.487 14.4 37.1 0.389 30.3 54.7
## 2 2 Brooklyn Nets* 72 241.7 43.1 87.3 0.494 14.2 36.1 0.392 29.0 51.2
## 3 3 Washington Wizards* 72 241.7 43.2 90.9 0.475 10.2 29.0 0.351 33.0 61.9
## 4 4 Utah Jazz* 72 241.0 41.3 88.1 0.468 16.7 43.0 0.389 24.5 45.1
## 5 5 Portland Trail Blazers* 72 240.3 41.3 91.1 0.453 15.7 40.8 0.385 25.6 50.3
## 6 6 Phoenix Suns* 72 242.8 43.3 88.3 0.490 13.1 34.6 0.378 30.3 53.7
## X2P. FT FTA FT. ORB DRB TRB AST STL BLK TOV PF PTS
## 1 0.554 16.2 21.4 0.760 10.3 37.8 48.1 25.5 8.1 4.6 13.8 17.3 120.1
## 2 0.565 18.1 22.5 0.804 8.9 35.5 44.4 26.8 6.7 5.3 13.5 19.0 118.6
## 3 0.533 20.1 26.2 0.769 9.7 35.5 45.2 25.5 7.3 4.1 14.4 21.6 116.6
## 4 0.544 17.2 21.5 0.799 10.6 37.6 48.3 23.7 6.6 5.2 14.2 18.5 116.4
## 5 0.509 17.8 21.6 0.823 10.6 33.9 44.5 21.3 6.9 5.0 11.1 18.9 116.1
## 6 0.563 15.6 18.7 0.834 8.8 34.2 42.9 26.9 7.2 4.3 12.5 19.1 115.3
df21 <- df21 %>%
mutate(across(everything(), ~ gsub("\\*", "", .)))
df2 <- df2 %>%
mutate(across(everything(), ~ gsub("\\*", "", .)))
df_new1 <- subset(df2, select = -c(Age, Rk, PW, PL, MOV, SOS, SRS, ORtg, DRtg, NRtg, FTr, X3PAr, TS., X, Offense.Four.Factors, X.1, X.2, X.3, X.4, Defense.Four.Factors, X.5, X.6, X.7, X.8, X.9, X.10, X.11))
head(df_new1)
## Team W L Pace
## 1 Utah Jazz 52 20 98.5
## 2 Los Angeles Clippers 47 25 96.9
## 3 Phoenix Suns 51 21 97.2
## 4 Milwaukee Bucks 46 26 102.2
## 5 Philadelphia 76ers 49 23 99.5
## 6 Denver Nuggets 47 25 97.1
merged_df1 <- merge(df21, df_new1, by = "Team")
head(merged_df1)
## Team Rk G MP FG FGA FG. X3P X3PA X3P. X2P X2PA
## 1 Atlanta Hawks 11 72 241.7 40.8 87.2 0.468 12.4 33.4 0.373 28.4 53.9
## 2 Boston Celtics 16 72 241.4 41.5 88.9 0.466 13.6 36.4 0.374 27.9 52.5
## 3 Brooklyn Nets 2 72 241.7 43.1 87.3 0.494 14.2 36.1 0.392 29 51.2
## 4 Charlotte Hornets 23 72 241 39.9 87.8 0.455 13.7 37 0.369 26.3 50.8
## 5 Chicago Bulls 21 72 241.4 42.2 88.6 0.476 12.6 34 0.37 29.6 54.6
## 6 Cleveland Cavaliers 30 72 242.1 38.6 85.8 0.45 10 29.7 0.336 28.6 56
## X2P. FT FTA FT. ORB DRB TRB AST STL BLK TOV PF PTS W L Pace
## 1 0.526 19.7 24.2 0.812 10.6 35.1 45.6 24.1 7 4.8 13.2 19.3 113.7 41 31 97.6
## 2 0.53 16.1 20.8 0.775 10.6 33.6 44.3 23.5 7.7 5.3 14.1 20.4 112.6 36 36 98.3
## 3 0.565 18.1 22.5 0.804 8.9 35.5 44.4 26.8 6.7 5.3 13.5 19 118.6 48 24 99.5
## 4 0.517 15.9 20.9 0.761 10.6 33.2 43.8 26.8 7.8 4.8 14.8 18 109.5 33 39 98.3
## 5 0.542 13.8 17.5 0.791 9.6 35.3 45 26.8 6.7 4.2 15.1 18.9 110.7 31 41 99
## 6 0.51 16.7 22.4 0.743 10.4 32.3 42.8 23.8 7.8 4.5 15.5 18.2 103.8 22 50 97.3
df_2020 <- subset(merged_df1, select = -c(Rk, X2P, X2PA, X2P.))
df_2020 <- df_2020 %>% mutate(Year = 2020)
head(df_2020)
## Team G MP FG FGA FG. X3P X3PA X3P. FT FTA FT.
## 1 Atlanta Hawks 72 241.7 40.8 87.2 0.468 12.4 33.4 0.373 19.7 24.2 0.812
## 2 Boston Celtics 72 241.4 41.5 88.9 0.466 13.6 36.4 0.374 16.1 20.8 0.775
## 3 Brooklyn Nets 72 241.7 43.1 87.3 0.494 14.2 36.1 0.392 18.1 22.5 0.804
## 4 Charlotte Hornets 72 241 39.9 87.8 0.455 13.7 37 0.369 15.9 20.9 0.761
## 5 Chicago Bulls 72 241.4 42.2 88.6 0.476 12.6 34 0.37 13.8 17.5 0.791
## 6 Cleveland Cavaliers 72 242.1 38.6 85.8 0.45 10 29.7 0.336 16.7 22.4 0.743
## ORB DRB TRB AST STL BLK TOV PF PTS W L Pace Year
## 1 10.6 35.1 45.6 24.1 7 4.8 13.2 19.3 113.7 41 31 97.6 2020
## 2 10.6 33.6 44.3 23.5 7.7 5.3 14.1 20.4 112.6 36 36 98.3 2020
## 3 8.9 35.5 44.4 26.8 6.7 5.3 13.5 19 118.6 48 24 99.5 2020
## 4 10.6 33.2 43.8 26.8 7.8 4.8 14.8 18 109.5 33 39 98.3 2020
## 5 9.6 35.3 45 26.8 6.7 4.2 15.1 18.9 110.7 31 41 99 2020
## 6 10.4 32.3 42.8 23.8 7.8 4.5 15.5 18.2 103.8 22 50 97.3 2020
names(df_2020) <- c("Team", "Games", "MinutesPlayed", "FieldGoal", "FieldGoalsAttempted", "FieldGoalsPercentage", "3Point", "3PointAttempted", "3PointPercentage", "FreeThrows", "FreeThrowsAttempted", "FreeThrowPercentage", "Offensive Rebounds", "Defensive Rebounds", "Total Rebounds", "Assists", "Steals", "Blocks", "Turnovers", "PersonalFouls", "Points", "Wins", "Losses", "Pace", "Year")
df_2020 <- df_2020 %>%
relocate(Wins, Losses, .before = MinutesPlayed) %>%
relocate(Year, .before = Games) %>% relocate(Pace, .before = Points)
# Combine datasets
combined_data <- bind_rows(df_2019, df_2020)
df31 <- read.csv("C:\\Users\\LENOVO\\Downloads\\Regression Model\\Project\\sportsref_download (2).csv")
df3 <- read.csv("C:\\Users\\LENOVO\\Downloads\\2021-22.csv")
df31 <- df31 %>%
mutate(across(everything(), ~ gsub("\\*", "", .)))
df3 <- df3 %>%
mutate(across(everything(), ~ gsub("\\*", "", .)))
df_new2 <- subset(df3, select = -c(Age, Rk, PW, PL, MOV, SOS, SRS, ORtg, DRtg, NRtg, FTr, X3PAr, TS., X, eFG., TOV., ORB., FT.FGA, X.1, X.2, eFG..1, TOV..1, DRB., FT.FGA.1, Arena, Attend., Attend..G))
merged_df2 <- merge(df31, df_new2, by = "Team")
df_2021 <- subset(merged_df2, select = -c(Rk, X2P, X2PA, X2P.))
df_2021 <- df_2021 %>% mutate(Year = 2021)
names(df_2021) <- c("Team", "Games", "MinutesPlayed", "FieldGoal", "FieldGoalsAttempted", "FieldGoalsPercentage", "3Point", "3PointAttempted", "3PointPercentage", "FreeThrows", "FreeThrowsAttempted", "FreeThrowPercentage", "Offensive Rebounds", "Defensive Rebounds", "Total Rebounds", "Assists", "Steals", "Blocks", "Turnovers", "PersonalFouls", "Points", "Wins", "Losses", "Pace", "Year")
df_2021 <- df_2021 %>%
relocate(Wins, Losses, .before = MinutesPlayed) %>%
relocate(Year, .before = Games) %>% relocate(Pace, .before = Points)
df41 <- read.csv("C:\\Users\\LENOVO\\Downloads\\Regression Model\\Project\\sportsref_download (3).csv")
df4 <- read.csv("C:\\Users\\LENOVO\\Downloads\\2022-23.csv")
df41 <- df41 %>%
mutate(across(everything(), ~ gsub("\\*", "", .)))
df4 <- df4 %>%
mutate(across(everything(), ~ gsub("\\*", "", .)))
df_new4 <- subset(df4, select = -c(Age, Rk, PW, PL, MOV, SOS, SRS, ORtg, DRtg, NRtg, FTr, X3PAr, TS., X, Offense.Four.Factors, X.1, X.2, X.3, X.4, Defense.Four.Factors, X.5, X.6, X.7, X.8, X.9, X.10, X.11))
merged_df3 <- merge(df41, df_new4, by = "Team")
df_2022 <- subset(merged_df3, select = -c(Rk, X2P, X2PA, X2P.))
df_2022 <- df_2022 %>% mutate(Year = 2022)
names(df_2022) <- c("Team", "Games", "MinutesPlayed", "FieldGoal", "FieldGoalsAttempted", "FieldGoalsPercentage", "3Point", "3PointAttempted", "3PointPercentage", "FreeThrows", "FreeThrowsAttempted", "FreeThrowPercentage", "Offensive Rebounds", "Defensive Rebounds", "Total Rebounds", "Assists", "Steals", "Blocks", "Turnovers", "PersonalFouls", "Points", "Wins", "Losses", "Pace", "Year")
df_2022 <- df_2022 %>%
relocate(Wins, Losses, .before = MinutesPlayed) %>%
relocate(Year, .before = Games) %>% relocate(Pace, .before = Points)
# Combine datasets
combined_data <- bind_rows(combined_data, df_2021, df_2022)
“I’ve merged the datasets from 2019 to 2022, and now the data is ready for exploration, visualization, and regression modeling.”
data <- combined_data %>%
mutate(
Team = as.factor(Team),
Year = as.factor(Year)
)
str(data)
## 'data.frame': 120 obs. of 25 variables:
## $ Team : Factor w/ 30 levels "Atlanta Hawks",..: 1 2 3 4 5 6 7 8 9 10 ...
## $ Year : Factor w/ 4 levels "2019","2020",..: 1 1 1 1 1 1 1 1 1 1 ...
## $ Games : chr "67" "72" "72" "65" ...
## $ Wins : chr "20" "48" "35" "23" ...
## $ Losses : chr "47" "24" "37" "42" ...
## $ MinutesPlayed : chr "243" "242.1" "242.8" "242.3" ...
## $ FieldGoal : chr "40.6" "41.3" "40.4" "37.3" ...
## $ FieldGoalsAttempted : chr "90.6" "89.6" "90.3" "85.9" ...
## $ FieldGoalsPercentage: chr "0.449" "0.461" "0.448" "0.434" ...
## $ 3Point : chr "12" "12.6" "13.1" "12.1" ...
## $ 3PointAttempted : chr "36.1" "34.5" "38.1" "34.3" ...
## $ 3PointPercentage : chr "0.333" "0.364" "0.343" "0.352" ...
## $ FreeThrows : chr "18.5" "18.6" "17.9" "16.2" ...
## $ FreeThrowsAttempted : chr "23.4" "23.2" "24.1" "21.6" ...
## $ FreeThrowPercentage : chr "0.79" "0.801" "0.745" "0.748" ...
## $ Offensive Rebounds : chr "9.9" "10.7" "10.6" "11" ...
## $ Defensive Rebounds : chr "33.4" "35.4" "37.3" "31.8" ...
## $ Total Rebounds : chr "43.3" "46.1" "47.9" "42.8" ...
## $ Assists : chr "24" "23" "24.5" "23.8" ...
## $ Steals : chr "7.8" "8.3" "6.4" "6.6" ...
## $ Blocks : chr "5.1" "5.6" "4.5" "4.1" ...
## $ Turnovers : chr "16.2" "13.8" "15.3" "14.6" ...
## $ PersonalFouls : chr "23.1" "21.6" "21" "18.8" ...
## $ Pace : chr "103" "99.5" "101.4" "95.8" ...
## $ Points : chr "111.8" "113.7" "111.8" "102.9" ...
Columns like Games, Wins, Losses, MinutesPlayed, FieldGoal, etc., are stored as character types instead of numeric. This prevents you from performing mathematical operations or analyses directly on these columns.We have to change the datasets to mumeric.
# Convert columns to numeric
data$Wins <- as.numeric(as.character(data$Wins))
data$Losses <- as.numeric(as.character(data$Losses))
data$MinutesPlayed <- as.numeric(as.character(data$MinutesPlayed))
data$FieldGoal <- as.numeric(as.character(data$FieldGoal))
data$FieldGoalsAttempted <- as.numeric(as.character(data$FieldGoalsAttempted))
data$FieldGoalsPercentage <- as.numeric(as.character(data$FieldGoalsPercentage))
data$Pace <- as.numeric(as.character(data$Pace))
data$`3Point` <- as.numeric(as.character(data$`3Point`))
data$`3PointAttempted` <- as.numeric(as.character(data$`3PointAttempted`))
data$`3PointPercentage` <- as.numeric(as.character(data$`3PointPercentage`))
data$Points <- as.numeric(as.character(data$Points))
data$Games <- as.numeric(as.character(data$Games))
data$FieldGoalsPercentage <- as.numeric(as.character(data$FieldGoalsPercentage))
data$FreeThrows <- as.numeric(as.character(data$FreeThrows))
data$FreeThrowsAttempted <- as.numeric(as.character(data$FreeThrowsAttempted))
data$FreeThrowPercentage <- as.numeric(as.character(data$FreeThrowPercentage))
data$`Offensive Rebounds` <- as.numeric(as.character(data$`Offensive Rebounds`))
data$`Defensive Rebounds` <- as.numeric(as.character(data$`Defensive Rebounds`))
data$`Total Rebounds` <- as.numeric(as.character(data$`Total Rebounds`))
data$Assists <- as.numeric(as.character(data$Assists))
data$Steals <- as.numeric(as.character(data$Steals))
data$Blocks <- as.numeric(as.character(data$Blocks))
data$Turnovers <- as.numeric(as.character(data$Turnovers))
data$PersonalFouls <- as.numeric(as.character(data$PersonalFouls))
We need winning percentage for the y variable. Incorporating winning percentage into your analysis helps quantify the success of various strategies and provides a direct measure of team performance, which is essential for understanding the impact of 3-point shooting and other metrics.
Formula for calculate winning percentage: WP = Wins / Wins + Losses.
data$WinningPercentage <- round(data$Wins / (data$Wins + data$Losses), 3)
str(data)
## 'data.frame': 120 obs. of 26 variables:
## $ Team : Factor w/ 30 levels "Atlanta Hawks",..: 1 2 3 4 5 6 7 8 9 10 ...
## $ Year : Factor w/ 4 levels "2019","2020",..: 1 1 1 1 1 1 1 1 1 1 ...
## $ Games : num 67 72 72 65 65 65 75 73 66 65 ...
## $ Wins : num 20 48 35 23 22 19 43 46 20 15 ...
## $ Losses : num 47 24 37 42 43 46 32 27 46 50 ...
## $ MinutesPlayed : num 243 242 243 242 241 ...
## $ FieldGoal : num 40.6 41.3 40.4 37.3 39.6 40.3 41.7 42 39.3 38.6 ...
## $ FieldGoalsAttempted : num 90.6 89.6 90.3 85.9 88.6 87.9 90.3 88.9 85.7 88.2 ...
## $ FieldGoalsPercentage: num 0.449 0.461 0.448 0.434 0.447 0.458 0.461 0.473 0.459 0.438 ...
## $ 3Point : num 12 12.6 13.1 12.1 12.2 11.2 15.1 11 12 10.4 ...
## $ 3PointAttempted : num 36.1 34.5 38.1 34.3 35.1 31.8 41.3 30.6 32.7 31.3 ...
## $ 3PointPercentage : num 0.333 0.364 0.343 0.352 0.348 0.351 0.367 0.359 0.367 0.334 ...
## $ FreeThrows : num 18.5 18.6 17.9 16.2 15.5 15.1 18.6 16.2 16.6 18.7 ...
## $ FreeThrowsAttempted : num 23.4 23.2 24.1 21.6 20.5 19.9 23.8 20.9 22.4 23.2 ...
## $ FreeThrowPercentage : num 0.79 0.801 0.745 0.748 0.755 0.758 0.779 0.777 0.743 0.803 ...
## $ Offensive Rebounds : num 9.9 10.7 10.6 11 10.5 10.8 10.5 10.8 9.8 10 ...
## $ Defensive Rebounds : num 33.4 35.4 37.3 31.8 31.4 33.4 36.4 33.4 32 32.9 ...
## $ Total Rebounds : num 43.3 46.1 47.9 42.8 41.9 44.2 46.9 44.1 41.7 42.8 ...
## $ Assists : num 24 23 24.5 23.8 23.2 23.1 24.7 26.7 24.1 25.6 ...
## $ Steals : num 7.8 8.3 6.4 6.6 10 6.9 6.1 8 7.4 8.2 ...
## $ Blocks : num 5.1 5.6 4.5 4.1 4.1 3.2 4.8 4.6 4.5 4.6 ...
## $ Turnovers : num 16.2 13.8 15.3 14.6 15.5 16.5 12.7 13.8 15.3 14.9 ...
## $ PersonalFouls : num 23.1 21.6 21 18.8 21.8 18.3 19.5 20.3 19.7 20.1 ...
## $ Pace : num 103 99.5 101.4 95.8 99.7 ...
## $ Points : num 112 114 112 103 107 ...
## $ WinningPercentage : num 0.299 0.667 0.486 0.354 0.338 0.292 0.573 0.63 0.303 0.231 ...
We can see that every variable in the dataset is numerical, with the exception of the team and year variables. so that we can quickly analyze this dataset.
dim_info <- dim(data)
num_rows <- dim_info[1]
num_cols <- dim_info[2]
cat("Dimension of training set: Number of rows:", num_rows, ", Number of cols:", num_cols, "\n")
## Dimension of training set: Number of rows: 120 , Number of cols: 26
sapply(data, function(x) sum(is.na(x)))
## Team Year Games
## 0 0 0
## Wins Losses MinutesPlayed
## 0 0 0
## FieldGoal FieldGoalsAttempted FieldGoalsPercentage
## 0 0 0
## 3Point 3PointAttempted 3PointPercentage
## 0 0 0
## FreeThrows FreeThrowsAttempted FreeThrowPercentage
## 0 0 0
## Offensive Rebounds Defensive Rebounds Total Rebounds
## 0 0 0
## Assists Steals Blocks
## 0 0 0
## Turnovers PersonalFouls Pace
## 0 0 0
## Points WinningPercentage
## 0 0
The dataset contains no missing values.
hist(data$Points, main="Distribution of Points", xlab="Points", col="lightblue")
hist(data$'3PointPercentage', main="Distribution of 3-Point Percentage", xlab="3-Point Percentage", col="lightgreen")
hist(data$Pace, main = "Distribution of Pace", xlab = "Pace", col = "lightblue")
Distribution Of Points:
From the histogram, there is systematic shape(Normal Distribution) it may indicate a consistent pattern in the distribution of points. This could suggest that most teams score within a specific range, showing a regularity in scoring performance across the dataset.
Range: The spread from 105 to 120 points indicates that the majority of the teams score between these values. This range represents the central cluster of scoring performances.
The center of the distribution at 112 points indicates that the average or most common point value for the teams in your dataset is around 112. This suggests that a typical team’s score is close to this value.
Frequency The peak at between 112-114 indicates that this is the most common point value among the teams in your dataset. This suggests that the majority of teams score 113 points more frequently than any other specific score. Some team has above 120 points and some team has below 105 with low frequency.
Distribution Of 3 Point Percentage
Distribution Type: The graph shows with a peak at 0.36 and a central tendency around 0.35 to 0.36, combined with the spread, suggests a normal-like distribution or a single-peaked distribution. This indicates that most teams have shooting percentages clustered around 0.35 to 0.36, with decreasing frequencies as you move away from this center.
Frequency: With the highest frequency at 0.36 and a central spread, it appears that most teams have a 3-point shooting percentage close to this value.
Spread: The range from 0.32 to 0.42 shows there is some variability in shooting accuracy, but most teams are concentrated around the center.
Uniform with a Gap: The histogram shows a uniform distribution with a notable gap between 0.40 to 0.41, suggesting that these paces are less common.
Distribution of Pace
From the histogram, there is systematic shape(Normal Distribution) it may indicate a consistent pattern in the distribution of points. This could suggest that most teams score within a specific range, showing a regularity in scoring performance across the dataset.
Central Tendency: With a center at 100, the typical pace for most teams is around this value. Spread: The range from 96 to 106 indicates overall variability in team paces.
Peak Frequency: The highest frequency being above 30 suggests a common pace range where most teams fall.
cor(data[, sapply(data, is.numeric)])
## Games Wins Losses MinutesPlayed
## Games 1.00000000 0.35056640 0.16548766 -0.0736630731
## Wins 0.35056640 1.00000000 -0.86561035 -0.0119739236
## Losses 0.16548766 -0.86561035 1.00000000 -0.0267748017
## MinutesPlayed -0.07366307 -0.01197392 -0.02677480 1.0000000000
## FieldGoal 0.16654693 0.46888559 -0.40471129 0.0965369625
## FieldGoalsAttempted -0.07677444 -0.08446756 0.04790041 0.0606344998
## FieldGoalsPercentage 0.26224370 0.62545067 -0.51841648 0.0693477067
## 3Point 0.02904166 0.32638352 -0.32816784 -0.1134767325
## 3PointAttempted 0.06096423 0.13818450 -0.11291939 -0.0935726141
## 3PointPercentage -0.06384716 0.55263213 -0.61607950 -0.0770116133
## FreeThrows 0.10904503 0.12132897 -0.06946355 0.1420142517
## FreeThrowsAttempted 0.06207931 -0.02200901 0.05636696 0.1646469922
## FreeThrowPercentage 0.11235074 0.33657184 -0.29435551 -0.0308783114
## Offensive Rebounds 0.13671539 -0.08920448 0.16703051 0.0225641970
## Defensive Rebounds -0.20248278 0.37937697 -0.50775616 -0.0149966644
## Total Rebounds -0.09469480 0.27906993 -0.34450019 -0.0003796438
## Assists 0.14907767 0.27963754 -0.21476542 0.0735614919
## Steals -0.13772111 0.02213433 -0.09694066 -0.0672783012
## Blocks -0.09124210 0.10554033 -0.15992061 -0.0195431712
## Turnovers -0.20538781 -0.38278783 0.29328069 0.0185089966
## PersonalFouls -0.11234372 -0.15991919 0.10833695 0.0506363688
## Pace -0.24568161 -0.19439286 0.07335025 -0.0515228546
## Points 0.18913233 0.55511615 -0.48344028 0.0911148196
## WinningPercentage 0.12592062 0.97039636 -0.95454297 -0.0036491199
## FieldGoal FieldGoalsAttempted FieldGoalsPercentage
## Games 1.665469e-01 -0.076774442 0.262243696
## Wins 4.688856e-01 -0.084467563 0.625450669
## Losses -4.047113e-01 0.047900413 -0.518416485
## MinutesPlayed 9.653696e-02 0.060634500 0.069347707
## FieldGoal 1.000000e+00 0.553326142 0.751654038
## FieldGoalsAttempted 5.533261e-01 1.000000000 -0.132665344
## FieldGoalsPercentage 7.516540e-01 -0.132665344 1.000000000
## 3Point 5.757118e-02 0.004111025 0.062692270
## 3PointAttempted -8.725342e-02 0.090740749 -0.176924744
## 3PointPercentage 3.492564e-01 -0.197798081 0.568688797
## FreeThrows 3.476683e-02 -0.107776536 0.131539961
## FreeThrowsAttempted -6.661965e-05 -0.027137848 0.025701217
## FreeThrowPercentage 1.059129e-01 -0.174753526 0.265891657
## Offensive Rebounds 1.331960e-01 0.570253871 -0.292119434
## Defensive Rebounds 2.406382e-01 0.199545978 0.124652774
## Total Rebounds 2.869751e-01 0.499089378 -0.056511958
## Assists 5.778940e-01 0.193836386 0.536547182
## Steals 5.788495e-02 0.287982906 -0.150933876
## Blocks 2.191639e-01 0.324480254 0.007775403
## Turnovers -1.783437e-01 -0.088455531 -0.138979229
## PersonalFouls -4.143756e-02 0.141559348 -0.157539965
## Pace 4.239504e-01 0.643131632 -0.002120982
## Points 8.509588e-01 0.414429414 0.685548513
## WinningPercentage 4.595310e-01 -0.069302222 0.601337077
## 3Point 3PointAttempted 3PointPercentage FreeThrows
## Games 0.029041660 0.0609642342 -0.063847157 0.109045026
## Wins 0.326383524 0.1381844968 0.552632127 0.121328974
## Losses -0.328167841 -0.1129193918 -0.616079502 -0.069463548
## MinutesPlayed -0.113476732 -0.0935726141 -0.077011613 0.142014252
## FieldGoal 0.057571177 -0.0872534225 0.349256428 0.034766827
## FieldGoalsAttempted 0.004111025 0.0907407491 -0.197798081 -0.107776536
## FieldGoalsPercentage 0.062692270 -0.1769247438 0.568688797 0.131539961
## 3Point 1.000000000 0.9268438232 0.516853672 -0.049810750
## 3PointAttempted 0.926843823 1.0000000000 0.160344773 -0.054401321
## 3PointPercentage 0.516853672 0.1603447728 1.000000000 0.001928545
## FreeThrows -0.049810750 -0.0544013208 0.001928545 1.000000000
## FreeThrowsAttempted -0.114946319 -0.0475302933 -0.186914245 0.899011681
## FreeThrowPercentage 0.151303632 -0.0112953980 0.427352797 0.272921314
## Offensive Rebounds -0.157865860 -0.0366723594 -0.345577459 -0.018589952
## Defensive Rebounds 0.229562815 0.1810205509 0.193510388 -0.044613101
## Total Rebounds 0.114758806 0.1412046416 -0.026332617 -0.051659767
## Assists 0.081282109 -0.0103852838 0.232323382 -0.181804901
## Steals -0.154265336 -0.0905540269 -0.180509333 -0.047201779
## Blocks -0.005267099 -0.0008565708 0.003022284 0.040783106
## Turnovers 0.009229798 0.1140300440 -0.237517231 -0.027382245
## PersonalFouls 0.083216961 0.1201561358 -0.039026639 0.277551472
## Pace 0.077759360 0.1595909904 -0.150766400 0.194886427
## Points 0.415590772 0.2687918965 0.483053081 0.384789907
## WinningPercentage 0.340454983 0.1311695232 0.606037585 0.106429381
## FreeThrowsAttempted FreeThrowPercentage Offensive Rebounds
## Games 6.207931e-02 0.112350740 0.13671539
## Wins -2.200901e-02 0.336571842 -0.08920448
## Losses 5.636696e-02 -0.294355506 0.16703051
## MinutesPlayed 1.646470e-01 -0.030878311 0.02256420
## FieldGoal -6.661965e-05 0.105912883 0.13319596
## FieldGoalsAttempted -2.713785e-02 -0.174753526 0.57025387
## FieldGoalsPercentage 2.570122e-02 0.265891657 -0.29211943
## 3Point -1.149463e-01 0.151303632 -0.15786586
## 3PointAttempted -4.753029e-02 -0.011295398 -0.03667236
## 3PointPercentage -1.869142e-01 0.427352797 -0.34557746
## FreeThrows 8.990117e-01 0.272921314 -0.01858995
## FreeThrowsAttempted 1.000000e+00 -0.173805690 0.14396041
## FreeThrowPercentage -1.738057e-01 1.000000000 -0.36040152
## Offensive Rebounds 1.439604e-01 -0.360401515 1.00000000
## Defensive Rebounds 2.844401e-03 -0.081731062 -0.07397263
## Total Rebounds 8.163483e-02 -0.275194342 0.50637124
## Assists -2.108982e-01 0.065843805 -0.06092196
## Steals -5.011197e-03 -0.109884211 0.23200823
## Blocks 1.013238e-01 -0.121217409 0.10008418
## Turnovers 1.624770e-01 -0.419801707 0.07106108
## PersonalFouls 2.875565e-01 -0.008965285 0.06838577
## Pace 3.092184e-01 -0.232708414 0.05661767
## Points 2.938855e-01 0.246224347 0.04376516
## WinningPercentage -3.444408e-02 0.331857865 -0.13382041
## Defensive Rebounds Total Rebounds Assists Steals
## Games -0.202482776 -0.0946947979 0.14907767 -0.137721114
## Wins 0.379376970 0.2790699285 0.27963754 0.022134326
## Losses -0.507756161 -0.3445001947 -0.21476542 -0.096940663
## MinutesPlayed -0.014996664 -0.0003796438 0.07356149 -0.067278301
## FieldGoal 0.240638246 0.2869750844 0.57789402 0.057884954
## FieldGoalsAttempted 0.199545978 0.4990893777 0.19383639 0.287982906
## FieldGoalsPercentage 0.124652774 -0.0565119577 0.53654718 -0.150933876
## 3Point 0.229562815 0.1147588060 0.08128211 -0.154265336
## 3PointAttempted 0.181020551 0.1412046416 -0.01038528 -0.090554027
## 3PointPercentage 0.193510388 -0.0263326170 0.23232338 -0.180509333
## FreeThrows -0.044613101 -0.0516597666 -0.18180490 -0.047201779
## FreeThrowsAttempted 0.002844401 0.0816348289 -0.21089815 -0.005011197
## FreeThrowPercentage -0.081731062 -0.2751943417 0.06584381 -0.109884211
## Offensive Rebounds -0.073972633 0.5063712355 -0.06092196 0.232008229
## Defensive Rebounds 1.000000000 0.8220851953 0.05649846 -0.187626494
## Total Rebounds 0.822085195 1.0000000000 0.01700301 -0.028361971
## Assists 0.056498459 0.0170030150 1.00000000 0.159270925
## Steals -0.187626494 -0.0283619710 0.15927092 1.000000000
## Blocks 0.218673529 0.2492252353 0.03115317 0.263280848
## Turnovers 0.043784433 0.0777352639 0.14825071 0.056938574
## PersonalFouls -0.210161597 -0.1448027757 -0.01489154 0.266601909
## Pace 0.335418478 0.3220034428 0.24930974 0.212029893
## Points 0.269041126 0.2614031768 0.43563222 -0.026772406
## WinningPercentage 0.464435805 0.3265753374 0.25209393 0.048413352
## Blocks Turnovers PersonalFouls Pace
## Games -0.0912421045 -0.205387814 -0.112343717 -0.245681607
## Wins 0.1055403270 -0.382787829 -0.159919194 -0.194392859
## Losses -0.1599206053 0.293280695 0.108336954 0.073350253
## MinutesPlayed -0.0195431712 0.018508997 0.050636369 -0.051522855
## FieldGoal 0.2191638690 -0.178343727 -0.041437563 0.423950425
## FieldGoalsAttempted 0.3244802542 -0.088455531 0.141559348 0.643131632
## FieldGoalsPercentage 0.0077754032 -0.138979229 -0.157539965 -0.002120982
## 3Point -0.0052670994 0.009229798 0.083216961 0.077759360
## 3PointAttempted -0.0008565708 0.114030044 0.120156136 0.159590990
## 3PointPercentage 0.0030222845 -0.237517231 -0.039026639 -0.150766400
## FreeThrows 0.0407831064 -0.027382245 0.277551472 0.194886427
## FreeThrowsAttempted 0.1013237806 0.162477017 0.287556537 0.309218408
## FreeThrowPercentage -0.1212174089 -0.419801707 -0.008965285 -0.232708414
## Offensive Rebounds 0.1000841807 0.071061084 0.068385771 0.056617666
## Defensive Rebounds 0.2186735287 0.043784433 -0.210161597 0.335418478
## Total Rebounds 0.2492252353 0.077735264 -0.144802776 0.322003443
## Assists 0.0311531727 0.148250706 -0.014891541 0.249309741
## Steals 0.2632808484 0.056938574 0.266601909 0.212029893
## Blocks 1.0000000000 -0.079160946 0.149110494 0.297072043
## Turnovers -0.0791609455 1.000000000 0.326441904 0.414433064
## PersonalFouls 0.1491104938 0.326441904 1.000000000 0.357579822
## Pace 0.2970720433 0.414433064 0.357579822 1.000000000
## Points 0.1951790931 -0.151385860 0.100499506 0.449658357
## WinningPercentage 0.1447945917 -0.354136915 -0.136273424 -0.137718111
## Points WinningPercentage
## Games 0.18913233 0.12592062
## Wins 0.55511615 0.97039636
## Losses -0.48344028 -0.95454297
## MinutesPlayed 0.09111482 -0.00364912
## FieldGoal 0.85095883 0.45953104
## FieldGoalsAttempted 0.41442941 -0.06930222
## FieldGoalsPercentage 0.68554851 0.60133708
## 3Point 0.41559077 0.34045498
## 3PointAttempted 0.26879190 0.13116952
## 3PointPercentage 0.48305308 0.60603759
## FreeThrows 0.38478991 0.10642938
## FreeThrowsAttempted 0.29388553 -0.03444408
## FreeThrowPercentage 0.24622435 0.33185786
## Offensive Rebounds 0.04376516 -0.13382041
## Defensive Rebounds 0.26904113 0.46443580
## Total Rebounds 0.26140318 0.32657534
## Assists 0.43563222 0.25209393
## Steals -0.02677241 0.04841335
## Blocks 0.19517909 0.14479459
## Turnovers -0.15138586 -0.35413692
## PersonalFouls 0.10049951 -0.13627342
## Pace 0.44965836 -0.13771811
## Points 1.00000000 0.54703020
## WinningPercentage 0.54703020 1.00000000
Wins and WinningPercentage:
There is a strong positive correlation (0.97) between the number of wins and the winning percentage. This makes sense because as the number of wins increases, the winning percentage naturally increases as well.
FieldGoalsPercentage and Points:
A strong positive correlation (0.85) exists between field goal percentage and points scored. Teams with a higher field goal percentage tend to score more points.
3PointAttempts and 3PointPercentage:
The correlation between 3-point attempts and 3-point percentage is relatively low (0.16). This suggests that a team’s volume of 3-point attempts doesn’t strongly predict their accuracy in 3-point shooting.
Pace and FieldGoalsAttempted:
There is a moderate positive correlation (0.64) between pace and field goals attempted. Teams that play faster (higher pace) tend to attempt more field goals.
MinutesPlayed and Points:
Minutes played and points scored have a moderate positive correlation (0.45). More minutes played generally contributes to higher point totals, though it’s not a perfect predictor.
FieldGoal and FieldGoalsPercentage:
A strong positive correlation (0.75) between field goal percentage and field goals made suggests that teams that are more accurate in their shooting will make more field goals.
Losses and WinningPercentage:
The number of losses has a very strong negative correlation (-0.95) with the winning percentage, indicating that as losses increase, the winning percentage decreases significantly.
3PointPercentage and WinningPercentage:
There is a moderate positive correlation (0.61) between 3-point shooting percentage and winning percentage, suggesting that teams that shoot well from beyond the arc are more likely to win.
Notable Observations:
FieldGoalsAttempted and FieldGoalsPercentage have a negative correlation (-0.13), meaning that a higher number of attempts doesn’t necessarily correlate with a higher percentage. This might indicate variability in shooting accuracy based on shot volume.
Pace has a relatively weak negative correlation with field goal percentage (-0.15), suggesting that playing faster doesn’t strongly impact shooting accuracy.
Points and WinningPercentage are moderately positively correlated (0.55), which is intuitive as teams that score more points are more likely to win games.
plot(data$'3PointPercentage' ~ data$WinningPercentage, main="3-Point Perecentage vs Winning Percentage", xlab="Winning Percentage", ylab="3-Point Percentage")
abline(lm(data$'3PointPercentage' ~ data$WinningPercentage), col="red")
The scatter plot shows a positive trend, meaning as the winning percentage increases, the 3-point field goal percentage also tends to increase.
pairs(data[, c("Points", "3PointPercentage", "FieldGoalsPercentage", "Pace")], main="Pairwise Plot")
Average Metrics by Year:
library(dplyr)
data %>%
group_by(Year) %>%
summarise(across(where(is.numeric), mean, na.rm = TRUE))
## Warning: There was 1 warning in `summarise()`.
## ℹ In argument: `across(where(is.numeric), mean, na.rm = TRUE)`.
## ℹ In group 1: `Year = 2019`.
## Caused by warning:
## ! The `...` argument of `across()` is deprecated as of dplyr 1.1.0.
## Supply arguments directly to `.fns` through an anonymous function instead.
##
## # Previously
## across(a:b, mean, na.rm = TRUE)
##
## # Now
## across(a:b, \(x) mean(x, na.rm = TRUE))
## # A tibble: 4 × 25
## Year Games Wins Losses MinutesPlayed FieldGoal FieldGoalsAttempted
## <fct> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 2019 70.6 35.3 35.3 242. 40.8 88.8
## 2 2020 72 36 36 241. 41.2 88.4
## 3 2021 82 41 41 241. 40.6 88.1
## 4 2022 82 41 41 242. 42.0 88.3
## # ℹ 18 more variables: FieldGoalsPercentage <dbl>, `3Point` <dbl>,
## # `3PointAttempted` <dbl>, `3PointPercentage` <dbl>, FreeThrows <dbl>,
## # FreeThrowsAttempted <dbl>, FreeThrowPercentage <dbl>,
## # `Offensive Rebounds` <dbl>, `Defensive Rebounds` <dbl>,
## # `Total Rebounds` <dbl>, Assists <dbl>, Steals <dbl>, Blocks <dbl>,
## # Turnovers <dbl>, PersonalFouls <dbl>, Pace <dbl>, Points <dbl>,
## # WinningPercentage <dbl>
The number of games increased from 70.6 in 2019 to 82 in 2021 and 2022. This could be due to changes in the season length or adjustments for the pandemic.
Wins and Losses:
Wins and losses are balanced (41 wins and 41 losses) for 2021 and 2022, indicating that teams have had a more balanced performance compared to 2019 and 2020.
Minutes Played:
The minutes played per game remain fairly consistent across the years, with a slight decrease in 2021 compared to 2019 and little bit increase in 2022.
Field Goals and 3-Point Statistics:
Field Goals: The percentage of field goals attempted and made has decreased slightly from 2019 to 2022.
3-Point Statistics: There is a slight increase in 3-point field goals and percentage over the years, indicating a growing emphasis on the 3-point shot.
Pace:
The pace has slightly decreased from 2019 to 2022. This variation may reflect changes in playing style or game strategies over the years.
Points:
Points scored per game have generally increased, indicating higher scoring games or improved offensive strategies.
Winning Percentage:
The winning percentage has been relatively stable, with a small decrease in 2021 and 2022. This stability suggests that winning percentages have not fluctuated drastically despite changes in other metrics.
data %>%
group_by(Team) %>%
summarise(AverageWinningPercentage = mean(WinningPercentage, na.rm = TRUE)) %>%
arrange(desc(AverageWinningPercentage))
## # A tibble: 30 × 2
## Team AverageWinningPercentage
## <fct> <dbl>
## 1 Milwaukee Bucks 0.684
## 2 Philadelphia 76ers 0.638
## 3 Denver Nuggets 0.629
## 4 Phoenix Suns 0.626
## 5 Boston Celtics 0.621
## 6 Los Angeles Clippers 0.596
## 7 Utah Jazz 0.596
## 8 Miami Heat 0.586
## 9 Memphis Grizzlies 0.575
## 10 Dallas Mavericks 0.563
## # ℹ 20 more rows
We can see Milawaukee Bucks has the highest average winning percentage from 2019-2022. In the meantime Detroit Pistons registered lowest average winning percentage which is 0.27.Other teams has 0.4, 0.5 and 0.6 percentage respectively.
boxplot(Points ~ Year, data=data, main="Points by Year", xlab="Year", ylab="Points")
boxplot(`3PointPercentage` ~ Year, data=data, main="3-Point Percentage by Year", xlab="Year", ylab="3-Point Percentage")
library(ggplot2)
library(reshape2)
## Warning: package 'reshape2' was built under R version 4.3.3
cor_matrix <- cor(data[, sapply(data, is.numeric)])
library(corrplot)
## Warning: package 'corrplot' was built under R version 4.3.3
## corrplot 0.92 loaded
# Plot the correlation matrix with adjustments
corrplot(cor_matrix, method = "color", type = "upper",
tl.cex = 0.5, # Size of the text labels
addCoef.col = "black", # Add correlation coefficients in black
diag = FALSE, # Remove diagonal elements
tl.col = "black", # Color of the text labels
cl.cex = 0.5, # Size of the color legend
number.cex = 0.5) # Size of the correlation coefficients
library(ggplot2)
library(reshape2)
data_melted <- melt(data, id.vars = c("Team", "Year"))
ggplot(data_melted, aes(x=Year, y=variable, fill=value)) +
geom_tile() +
scale_fill_gradient2(low = "blue", high = "red", mid = "white", midpoint = median(data_melted$value, na.rm=TRUE)) +
labs(title="Heatmap of Metrics by Year", x="Year", y="Metric")
library(ggplot2)
ggplot(data, aes(x=Year, y=Points, group=Team)) +
geom_line(aes(color=Team)) +
labs(title="Points Trend Over Time", x="Year", y="Points") +
theme(axis.text.x = element_text(angle = 45, hjust = 1))
library(stargazer)
library(car) # For VIF function
## Warning: package 'car' was built under R version 4.3.3
## Loading required package: carData
## Warning: package 'carData' was built under R version 4.3.3
##
## Attaching package: 'car'
## The following object is masked from 'package:dplyr':
##
## recode
# Assuming 'data' contains all independent variables
vif_model <- lm(WinningPercentage ~ `3PointPercentage` + `3PointAttempted` + `3Point` + Games + FieldGoal + Pace + Team, data = data)
vif(vif_model)
## GVIF Df GVIF^(1/(2*Df))
## `3PointPercentage` 109.451084 1 10.461887
## `3PointAttempted` 582.655811 1 24.138264
## `3Point` 772.263898 1 27.789637
## Games 1.676959 1 1.294975
## FieldGoal 4.194103 1 2.047951
## Pace 3.669094 1 1.915488
## Team 19.353422 29 1.052411
Based on the Variance Inflation Factor(VIF),
VIF Values:
3PointPercentage: 10.46, 3PointAttempted: 24.14, 3Point: 27.79
Choose a Key Independent Variable:
Given that 3Point, 3PointAttempted, and 3PointPercentage exhibit high multicollinearity with each other, you should select only one to include in your models. Based on VIF values and relevance, 3PointPercentage is a suitable choice for inclusion. It is commonly used to measure shooting efficiency and is relevant for your analysis.
Model Specification:
Proceed with models using only 3PointPercentage as the key independent variable.
Model 1: Impact on Winning Percentage
# Model 1: y ~ key x
model1_wp <- lm(WinningPercentage ~ `3PointPercentage`, data = data)
# Model 2: y ~ key x + controls
model2_wp <- lm(WinningPercentage ~ `3PointPercentage` + Games + FieldGoal + Pace, data = data)
# Model 3: y ~ key x + controls + team dummies
model3_wp <- lm(WinningPercentage ~ `3PointPercentage` + Games + FieldGoal + Pace + factor(Team), data = data)
# Model 4: y ~ key x + controls + team dummies + year dummies
model4_wp <- lm(WinningPercentage ~ `3PointPercentage` + Games + FieldGoal + Pace + factor(Team) + factor(Year), data = data)
stargazer(model1_wp, model2_wp, model3_wp, model4_wp,
type = 'text',
dep.var.labels = c("Winning Percentage"),
column.labels = c("Model 1", "Model 2", "Model 3", "Model 4"),
title = "Regression Results for Winning Percentage",
align = TRUE,
no.space = TRUE,
column.sep.width = "0.5pt", # Adjust column separation width
#omit.stat = c("f", "ser"), # Omit some statistics to simplify
out = "model_summary_wp.txt")
##
## Regression Results for Winning Percentage
## ================================================================================================================================
## Dependent variable:
## ---------------------------------------------------------------------------------------------
## Winning Percentage
## Model 1 Model 2 Model 3 Model 4
## (1) (2) (3) (4)
## --------------------------------------------------------------------------------------------------------------------------------
## `3PointPercentage` 5.181*** 3.678*** 2.736*** 2.337***
## (0.626) (0.683) (0.750) (0.713)
## Games 0.001 0.0001 0.015***
## (0.002) (0.002) (0.006)
## FieldGoal 0.036*** 0.033*** 0.045***
## (0.008) (0.010) (0.010)
## Pace -0.016*** -0.014** -0.023***
## (0.006) (0.007) (0.007)
## factor(Team)Boston Celtics 0.114* 0.091
## (0.063) (0.058)
## factor(Team)Brooklyn Nets 0.059 0.044
## (0.063) (0.057)
## factor(Team)Charlotte Hornets -0.010 0.006
## (0.063) (0.058)
## factor(Team)Chicago Bulls -0.030 -0.024
## (0.063) (0.057)
## factor(Team)Cleveland Cavaliers 0.002 0.004
## (0.065) (0.059)
## factor(Team)Dallas Mavericks 0.085 0.050
## (0.064) (0.060)
## factor(Team)Denver Nuggets 0.074 0.025
## (0.065) (0.060)
## factor(Team)Detroit Pistons -0.107 -0.085
## (0.066) (0.061)
## factor(Team)Golden State Warriors 0.043 0.073
## (0.065) (0.059)
## factor(Team)Houston Rockets -0.002 0.008
## (0.066) (0.061)
## factor(Team)Indiana Pacers -0.031 -0.055
## (0.062) (0.057)
## factor(Team)Los Angeles Clippers 0.058 0.048
## (0.066) (0.060)
## factor(Team)Los Angeles Lakers 0.120* 0.106*
## (0.063) (0.057)
## factor(Team)Memphis Grizzlies 0.098 0.070
## (0.064) (0.058)
## factor(Team)Miami Heat 0.133* 0.117*
## (0.067) (0.063)
## factor(Team)Milwaukee Bucks 0.169** 0.155***
## (0.064) (0.058)
## factor(Team)Minnesota Timberwolves 0.009 0.042
## (0.064) (0.058)
## factor(Team)New Orleans Pelicans -0.006 -0.023
## (0.062) (0.057)
## factor(Team)New York Knicks 0.027 0.032
## (0.066) (0.060)
## factor(Team)Oklahoma City Thunder 0.053 0.051
## (0.065) (0.060)
## factor(Team)Orlando Magic 0.010 0.007
## (0.066) (0.062)
## factor(Team)Philadelphia 76ers 0.136** 0.117*
## (0.065) (0.059)
## factor(Team)Phoenix Suns 0.092 0.060
## (0.063) (0.057)
## factor(Team)Portland Trail Blazers -0.024 -0.038
## (0.065) (0.059)
## factor(Team)Sacramento Kings -0.027 -0.045
## (0.062) (0.057)
## factor(Team)San Antonio Spurs -0.086 -0.106*
## (0.063) (0.057)
## factor(Team)Toronto Raptors 0.101 0.084
## (0.063) (0.058)
## factor(Team)Utah Jazz 0.098 0.083
## (0.064) (0.058)
## factor(Team)Washington Wizards -0.032 -0.044
## (0.063) (0.057)
## factor(Year)2020 -0.079***
## (0.023)
## factor(Year)2021 -0.197***
## (0.070)
## factor(Year)2022 -0.251***
## (0.066)
## Constant -1.363*** -0.731 -0.473 -0.950
## (0.225) (0.636) (0.731) (0.700)
## --------------------------------------------------------------------------------------------------------------------------------
## Observations 120 120 120 120
## R2 0.367 0.483 0.705 0.768
## Adjusted R2 0.362 0.465 0.592 0.667
## Residual Std. Error 0.110 (df = 118) 0.101 (df = 115) 0.088 (df = 86) 0.080 (df = 83)
## F Statistic 68.497*** (df = 1; 118) 26.900*** (df = 4; 115) 6.225*** (df = 33; 86) 7.635*** (df = 36; 83)
## ================================================================================================================================
## Note: *p<0.1; **p<0.05; ***p<0.01
The regression results you’ve provided give a detailed view of how the inclusion of additional variables impacts the relationship between 3PointPercentage and WinningPercentage.
Model 1 (Simple Relationship)
Coefficient of 3PointPercentage: 5.181 (p < 0.01)
Interpretation: In the simplest model, each one-unit increase in 3PointPercentage is associated with a 5.181 percentage point increase in winning percentage. This indicates a strong positive relationship between the proportion of 3-point shots made and winning percentage.
Model 2 (Including Controls)
Coefficient of 3PointPercentage: 3.678 (p < 0.01)
Interpretation: When controlling for other factors like Games, FieldGoal, and Pace, the impact of 3PointPercentage on winning percentage decreases but remains positive and significant. This suggests that while 3PointPercentage is important, its effect is somewhat influenced by other aspects of team performance.
Model 3 (Including Team Dummies)
Coefficient of 3PointPercentage: 2.736 (p < 0.01)
Interpretation: Adding team-specific effects reduces the coefficient further. This decrease reflects that part of the impact of 3PointPercentage on winning percentage can be attributed to differences between teams.
Model 4 (Including Year Dummies)
Coefficient of 3PointPercentage: 2.337 (p < 0.01)
Interpretation: Including both team and year dummies further reduces the coefficient. This suggests that the effect of 3PointPercentage is also influenced by temporal factors affecting team performance across different seasons.
Magnitude of Impact: The coefficient of 3PointPercentage decreases from 5.181 in Model 1 to 2.337 in Model 4. This decrease occurs as account for additional variables, indicating that while 3PointPercentage has a substantial positive effect on winning percentage, other factors (such as team quality and year-specific effects) also play a role.
Effect After Controls: Even in the most comprehensive model (Model 4), where the control for team and year effects, the coefficient remains significant and positive. This suggests that an increase in the number of 3-point shots taken (and made) continues to have a beneficial effect on winning percentage, although the effect is somewhat moderated when other factors are considered.
Practical Implications: The results imply that increasing the number of 3-point shots can lead to a higher winning percentage, but the magnitude of this effect is influenced by additional variables. Teams should consider the benefits of a strong 3-point shooting game while also accounting for other aspects such as overall team strategy and seasonal variations.
Contextual Factors: The significant year dummies indicate that the impact of 3PointPercentage might vary across seasons. This could be due to changes in game dynamics, rule changes, or evolving strategies in the league.
Model 2: Imapct on Total Points Scored
# Model 1: y ~ key x
model1_pts <- lm(Points ~ `3PointPercentage`, data = data)
# Model 2: y ~ key x + controls
model2_pts <- lm(Points ~ `3PointPercentage` + Games + FieldGoal + Pace, data = data)
# Model 3: y ~ key x + controls + team dummies
model3_pts <- lm(Points ~ `3PointPercentage` + Games + FieldGoal + Pace + factor(Team), data = data)
# Model 4: y ~ key x + controls + team dummies + year dummies
model4_pts <- lm(Points ~ `3PointPercentage` + Games + FieldGoal + Pace + factor(Team) + factor(Year), data = data)
stargazer(model1_pts, model2_pts, model3_pts, model4_pts,
align = TRUE,
type = 'text',
dep.var.labels = c("Total Points"),
column.labels = c("Model 1", "Model 2", "Model 3", "Model 4"),
title = "Regression for Total Points",
no.space = TRUE,
out = "model_summary.txt")
##
## Regression for Total Points
## ===================================================================================================================================
## Dependent variable:
## ------------------------------------------------------------------------------------------------
## Total Points
## Model 1 Model 2 Model 3 Model 4
## (1) (2) (3) (4)
## -----------------------------------------------------------------------------------------------------------------------------------
## `3PointPercentage` 115.372*** 81.892*** 62.416*** 65.209***
## (19.252) (11.136) (12.226) (12.546)
## Games 0.131*** 0.103*** 0.056
## (0.030) (0.029) (0.097)
## FieldGoal 1.403*** 1.732*** 1.613***
## (0.131) (0.162) (0.171)
## Pace 0.587*** 0.543*** 0.512***
## (0.098) (0.115) (0.121)
## factor(Team)Boston Celtics 0.420 0.351
## (1.031) (1.014)
## factor(Team)Brooklyn Nets -0.734 -0.725
## (1.026) (1.003)
## factor(Team)Charlotte Hornets -1.232 -1.461
## (1.034) (1.012)
## factor(Team)Chicago Bulls -2.973*** -3.080***
## (1.020) (0.995)
## factor(Team)Cleveland Cavaliers -1.658 -1.969*
## (1.059) (1.043)
## factor(Team)Dallas Mavericks 1.714 1.554
## (1.050) (1.060)
## factor(Team)Denver Nuggets -1.576 -1.506
## (1.055) (1.048)
## factor(Team)Detroit Pistons -0.727 -1.108
## (1.081) (1.072)
## factor(Team)Golden State Warriors -1.088 -1.222
## (1.052) (1.031)
## factor(Team)Houston Rockets 0.685 0.621
## (1.083) (1.079)
## factor(Team)Indiana Pacers -2.218** -2.108**
## (1.017) (0.999)
## factor(Team)Los Angeles Clippers -1.262 -1.398
## (1.080) (1.057)
## factor(Team)Los Angeles Lakers -1.169 -1.085
## (1.021) (1.002)
## factor(Team)Memphis Grizzlies -2.540** -2.259**
## (1.037) (1.020)
## factor(Team)Miami Heat 0.805 0.466
## (1.093) (1.100)
## factor(Team)Milwaukee Bucks -0.869 -0.614
## (1.051) (1.028)
## factor(Team)Minnesota Timberwolves 0.027 0.013
## (1.044) (1.026)
## factor(Team)New Orleans Pelicans -0.836 -0.761
## (1.017) (0.999)
## factor(Team)New York Knicks -0.525 -0.882
## (1.068) (1.053)
## factor(Team)Oklahoma City Thunder -1.401 -1.515
## (1.051) (1.049)
## factor(Team)Orlando Magic -1.561 -1.797
## (1.080) (1.097)
## factor(Team)Philadelphia 76ers -0.123 -0.295
## (1.059) (1.046)
## factor(Team)Phoenix Suns -1.808* -1.703*
## (1.025) (1.007)
## factor(Team)Portland Trail Blazers -0.022 -0.140
## (1.052) (1.040)
## factor(Team)Sacramento Kings -1.062 -1.007
## (1.017) (0.996)
## factor(Team)San Antonio Spurs -3.090*** -2.935***
## (1.021) (0.999)
## factor(Team)Toronto Raptors 0.111 -0.023
## (1.032) (1.024)
## factor(Team)Utah Jazz 1.124 1.026
## (1.039) (1.020)
## factor(Team)Washington Wizards -1.563 -1.470
## (1.022) (1.002)
## factor(Year)2020 -0.264
## (0.413)
## factor(Year)2021 -0.036
## (1.224)
## factor(Year)2022 0.958
## (1.168)
## Constant 70.804*** -43.203*** -42.400*** -31.910**
## (6.925) (10.368) (11.909) (12.320)
## -----------------------------------------------------------------------------------------------------------------------------------
## Observations 120 120 120 120
## R2 0.233 0.824 0.900 0.908
## Adjusted R2 0.227 0.818 0.861 0.868
## Residual Std. Error 3.388 (df = 118) 1.643 (df = 115) 1.436 (df = 86) 1.399 (df = 83)
## F Statistic 35.914*** (df = 1; 118) 134.838*** (df = 4; 115) 23.355*** (df = 33; 86) 22.759*** (df = 36; 83)
## ===================================================================================================================================
## Note: *p<0.1; **p<0.05; ***p<0.01
Model 3: Impact on Field-Goal Percentage
# Model 1: y ~ key x
model1_fg <- lm(FieldGoalsPercentage ~ `3PointPercentage`, data = data)
# Model 2: y ~ key x + controls
model2_fg <- lm(FieldGoalsPercentage ~ `3PointPercentage` + Games + FieldGoal + Pace, data = data)
# Model 3: y ~ key x + controls + team dummies
model3_fg <- lm(FieldGoalsPercentage ~ `3PointPercentage` + Games + FieldGoal + Pace + factor(Team), data = data)
# Model 4: y ~ key x + controls + team dummies + year dummies
model4_fg <- lm(FieldGoalsPercentage ~ `3PointPercentage` + Games + FieldGoal + Pace + factor(Team) + factor(Year), data = data)
stargazer(model1_fg, model2_fg, model3_fg, model4_fg,
align = TRUE,
type = 'text',
dep.var.labels = c("Filed Goals Percentage"),
column.labels = c("Model 1", "Model 2", "Model 3", "Model 4"),
title = "Regression for Filed Goals Percentage",
no.space = TRUE,
out = "model_summary.txt")
##
## Regression for Filed Goals Percentage
## ==================================================================================================================================
## Dependent variable:
## -----------------------------------------------------------------------------------------------
## Filed Goals Percentage
## Model 1 Model 2 Model 3 Model 4
## (1) (2) (3) (4)
## ----------------------------------------------------------------------------------------------------------------------------------
## `3PointPercentage` 0.524*** 0.253*** 0.257*** 0.253***
## (0.070) (0.052) (0.057) (0.058)
## Games 0.0002* 0.0003** 0.0004
## (0.0001) (0.0001) (0.0004)
## FieldGoal 0.007*** 0.007*** 0.006***
## (0.001) (0.001) (0.001)
## Pace -0.002*** -0.002*** -0.002***
## (0.0005) (0.001) (0.001)
## factor(Team)Boston Celtics -0.003 -0.004
## (0.005) (0.005)
## factor(Team)Brooklyn Nets 0.006 0.006
## (0.005) (0.005)
## factor(Team)Charlotte Hornets -0.004 -0.005
## (0.005) (0.005)
## factor(Team)Chicago Bulls 0.006 0.006
## (0.005) (0.005)
## factor(Team)Cleveland Cavaliers 0.008* 0.006
## (0.005) (0.005)
## factor(Team)Dallas Mavericks 0.003 0.001
## (0.005) (0.005)
## factor(Team)Denver Nuggets 0.007 0.007
## (0.005) (0.005)
## factor(Team)Detroit Pistons 0.002 -0.001
## (0.005) (0.005)
## factor(Team)Golden State Warriors 0.003 0.002
## (0.005) (0.005)
## factor(Team)Houston Rockets 0.004 0.003
## (0.005) (0.005)
## factor(Team)Indiana Pacers 0.0002 0.0003
## (0.005) (0.005)
## factor(Team)Los Angeles Clippers -0.001 -0.002
## (0.005) (0.005)
## factor(Team)Los Angeles Lakers 0.011** 0.011**
## (0.005) (0.005)
## factor(Team)Memphis Grizzlies -0.005 -0.004
## (0.005) (0.005)
## factor(Team)Miami Heat 0.009 0.006
## (0.005) (0.005)
## factor(Team)Milwaukee Bucks 0.00004 0.001
## (0.005) (0.005)
## factor(Team)Minnesota Timberwolves -0.0002 -0.0001
## (0.005) (0.005)
## factor(Team)New Orleans Pelicans 0.004 0.004
## (0.005) (0.005)
## factor(Team)New York Knicks -0.006 -0.008
## (0.005) (0.005)
## factor(Team)Oklahoma City Thunder -0.0002 -0.002
## (0.005) (0.005)
## factor(Team)Orlando Magic -0.001 -0.004
## (0.005) (0.005)
## factor(Team)Philadelphia 76ers 0.008 0.006
## (0.005) (0.005)
## factor(Team)Phoenix Suns 0.001 0.001
## (0.005) (0.005)
## factor(Team)Portland Trail Blazers -0.005 -0.006
## (0.005) (0.005)
## factor(Team)Sacramento Kings 0.006 0.006
## (0.005) (0.005)
## factor(Team)San Antonio Spurs -0.005 -0.004
## (0.005) (0.005)
## factor(Team)Toronto Raptors -0.008* -0.010**
## (0.005) (0.005)
## factor(Team)Utah Jazz 0.003 0.002
## (0.005) (0.005)
## factor(Team)Washington Wizards 0.007 0.007
## (0.005) (0.005)
## factor(Year)2020 -0.001
## (0.002)
## factor(Year)2021 -0.005
## (0.006)
## factor(Year)2022 0.001
## (0.005)
## Constant 0.277*** 0.250*** 0.228*** 0.274***
## (0.025) (0.048) (0.056) (0.057)
## ----------------------------------------------------------------------------------------------------------------------------------
## Observations 120 120 120 120
## R2 0.323 0.744 0.851 0.868
## Adjusted R2 0.318 0.735 0.794 0.810
## Residual Std. Error 0.012 (df = 118) 0.008 (df = 115) 0.007 (df = 86) 0.006 (df = 83)
## F Statistic 56.403*** (df = 1; 118) 83.402*** (df = 4; 115) 14.933*** (df = 33; 86) 15.105*** (df = 36; 83)
## ==================================================================================================================================
## Note: *p<0.1; **p<0.05; ***p<0.01
stargazer(model4_wp, model4_pts, model4_fg,
align = TRUE,
type = 'text',
column.labels = c("Model wp", "Model pts", "Model fg"),
title = "Regression Model",
no.space = TRUE,
out = "model_summary.txt")
##
## Regression Model
## ===================================================================================
## Dependent variable:
## ------------------------------------------------
## WinningPercentage Points FieldGoalsPercentage
## Model wp Model pts Model fg
## (1) (2) (3)
## -----------------------------------------------------------------------------------
## `3PointPercentage` 2.337*** 65.209*** 0.253***
## (0.713) (12.546) (0.058)
## Games 0.015*** 0.056 0.0004
## (0.006) (0.097) (0.0004)
## FieldGoal 0.045*** 1.613*** 0.006***
## (0.010) (0.171) (0.001)
## Pace -0.023*** 0.512*** -0.002***
## (0.007) (0.121) (0.001)
## factor(Team)Boston Celtics 0.091 0.351 -0.004
## (0.058) (1.014) (0.005)
## factor(Team)Brooklyn Nets 0.044 -0.725 0.006
## (0.057) (1.003) (0.005)
## factor(Team)Charlotte Hornets 0.006 -1.461 -0.005
## (0.058) (1.012) (0.005)
## factor(Team)Chicago Bulls -0.024 -3.080*** 0.006
## (0.057) (0.995) (0.005)
## factor(Team)Cleveland Cavaliers 0.004 -1.969* 0.006
## (0.059) (1.043) (0.005)
## factor(Team)Dallas Mavericks 0.050 1.554 0.001
## (0.060) (1.060) (0.005)
## factor(Team)Denver Nuggets 0.025 -1.506 0.007
## (0.060) (1.048) (0.005)
## factor(Team)Detroit Pistons -0.085 -1.108 -0.001
## (0.061) (1.072) (0.005)
## factor(Team)Golden State Warriors 0.073 -1.222 0.002
## (0.059) (1.031) (0.005)
## factor(Team)Houston Rockets 0.008 0.621 0.003
## (0.061) (1.079) (0.005)
## factor(Team)Indiana Pacers -0.055 -2.108** 0.0003
## (0.057) (0.999) (0.005)
## factor(Team)Los Angeles Clippers 0.048 -1.398 -0.002
## (0.060) (1.057) (0.005)
## factor(Team)Los Angeles Lakers 0.106* -1.085 0.011**
## (0.057) (1.002) (0.005)
## factor(Team)Memphis Grizzlies 0.070 -2.259** -0.004
## (0.058) (1.020) (0.005)
## factor(Team)Miami Heat 0.117* 0.466 0.006
## (0.063) (1.100) (0.005)
## factor(Team)Milwaukee Bucks 0.155*** -0.614 0.001
## (0.058) (1.028) (0.005)
## factor(Team)Minnesota Timberwolves 0.042 0.013 -0.0001
## (0.058) (1.026) (0.005)
## factor(Team)New Orleans Pelicans -0.023 -0.761 0.004
## (0.057) (0.999) (0.005)
## factor(Team)New York Knicks 0.032 -0.882 -0.008
## (0.060) (1.053) (0.005)
## factor(Team)Oklahoma City Thunder 0.051 -1.515 -0.002
## (0.060) (1.049) (0.005)
## factor(Team)Orlando Magic 0.007 -1.797 -0.004
## (0.062) (1.097) (0.005)
## factor(Team)Philadelphia 76ers 0.117* -0.295 0.006
## (0.059) (1.046) (0.005)
## factor(Team)Phoenix Suns 0.060 -1.703* 0.001
## (0.057) (1.007) (0.005)
## factor(Team)Portland Trail Blazers -0.038 -0.140 -0.006
## (0.059) (1.040) (0.005)
## factor(Team)Sacramento Kings -0.045 -1.007 0.006
## (0.057) (0.996) (0.005)
## factor(Team)San Antonio Spurs -0.106* -2.935*** -0.004
## (0.057) (0.999) (0.005)
## factor(Team)Toronto Raptors 0.084 -0.023 -0.010**
## (0.058) (1.024) (0.005)
## factor(Team)Utah Jazz 0.083 1.026 0.002
## (0.058) (1.020) (0.005)
## factor(Team)Washington Wizards -0.044 -1.470 0.007
## (0.057) (1.002) (0.005)
## factor(Year)2020 -0.079*** -0.264 -0.001
## (0.023) (0.413) (0.002)
## factor(Year)2021 -0.197*** -0.036 -0.005
## (0.070) (1.224) (0.006)
## factor(Year)2022 -0.251*** 0.958 0.001
## (0.066) (1.168) (0.005)
## Constant -0.950 -31.910** 0.274***
## (0.700) (12.320) (0.057)
## -----------------------------------------------------------------------------------
## Observations 120 120 120
## R2 0.768 0.908 0.868
## Adjusted R2 0.667 0.868 0.810
## Residual Std. Error (df = 83) 0.080 1.399 0.006
## F Statistic (df = 36; 83) 7.635*** 22.759*** 15.105***
## ===================================================================================
## Note: *p<0.1; **p<0.05; ***p<0.01
# Assuming models are already fitted as per your examples
# Extract AIC and BIC values
aic_values_wp <- c(AIC(model1_wp), AIC(model2_wp), AIC(model3_wp), AIC(model4_wp))
bic_values_wp <- c(BIC(model1_wp), BIC(model2_wp), BIC(model3_wp), BIC(model4_wp))
aic_values_points <- c(AIC(model1_pts), AIC(model2_pts), AIC(model3_pts), AIC(model4_pts))
bic_values_points <- c(BIC(model1_pts), BIC(model2_pts), BIC(model3_pts), BIC(model4_pts))
aic_values_fg <- c(AIC(model1_fg), AIC(model2_fg), AIC(model3_fg), AIC(model4_fg))
bic_values_fg <- c(BIC(model1_fg), BIC(model2_fg), BIC(model3_fg), BIC(model4_fg))
# Print AIC and BIC values
print(data.frame(Model = c("Model 1", "Model 2", "Model 3", "Model 4"),
AIC_WP = aic_values_wp,
BIC_WP = bic_values_wp,
AIC_Points = aic_values_points,
BIC_Points = bic_values_points,
AIC_FG = aic_values_fg,
BIC_FG = bic_values_fg))
## Model AIC_WP BIC_WP AIC_Points BIC_Points AIC_FG BIC_FG
## 1 Model 1 -184.8636 -176.5011 637.3680 645.7305 -711.2876 -702.9252
## 2 Model 2 -203.1888 -186.4638 466.6080 483.3329 -821.7513 -805.0263
## 3 Model 3 -212.3853 -114.8231 457.4017 554.9640 -829.1958 -731.6336
## 4 Model 4 -235.2935 -129.3688 452.9184 558.8431 -837.0166 -731.0919
1. Winning Percentage (WP)
Model 1: AIC = -184.86, BIC = -176.50 Model 2: AIC = -203.19, BIC = -186.46 Model 3: AIC = -212.39, BIC = -114.82 Model 4: AIC = -235.29, BIC = -129.37
Best Model: Model 4 has the lowest AIC and BIC values, indicating the best fit for predicting Winning Percentage. It includes 3-Point Percentage, Games, Field Goal, Pace, and both Team and Year factors.
2. Total Points (Points)
Model 1: AIC = 637.37, BIC = 645.73 Model 2: AIC = 466.61, BIC = 483.33 Model 3: AIC = 457.40, BIC = 554.96 Model 4: AIC = 452.92, BIC = 558.84
Best Model: Model 4 has the lowest AIC and is very close in BIC to Model 3. Therefore, Model 4 is the preferred model for predicting Total Points, incorporating 3-Point Percentage, Games, Field Goal, Pace, and both Team and Year factors.
3. Field Goals Percentage (FG%)
Model 1: AIC = -711.29, BIC = -702.93 Model 2: AIC = -821.75, BIC = -805.03 Model 3: AIC = -829.20, BIC = -731.63 Model 4: AIC = -837.02, BIC = -731.09
Best Model: Model 4 has the lowest AIC, though its BIC is close to that of Model 3. Thus, Model 4 is also the best model for Field Goals Percentage, which includes the same predictors as the other models.
Overall Best Models:
Winning Percentage: Model 4 Total Points: Model 4 Field Goals Percentage: Model 4
Rationale: Model 4 consistently shows the lowest AIC and BIC values across all outcome variables. This suggests it provides the best balance between fit and complexity, incorporating all relevant predictors, including 3-Point Percentage, Games, Field Goals, Pace, and both Team and Year effects.
# Load necessary libraries
library(ggplot2)
library(car) # For Variance Inflation Factor (VIF)
Residuals vs. Fitted Values Plot
This plot helps verify if the relationship between predictors and the outcome is linear.
# Plot residuals vs. fitted values for Model 4_Winning_Percentage
plot(model4_wp$fitted.values, model4_wp$residuals,
xlab = "Fitted Values", ylab = "Residuals",
main = "Residuals vs Fitted Values (Model 4 WP)")
abline(h = 0, col = "red")
# Plot residuals vs. fitted values for Model 4_Points
plot(model4_pts$fitted.values, model4_pts$residuals,
xlab = "Fitted Values", ylab = "Residuals",
main = "Residuals vs Fitted Values (Model 4 Points)")
abline(h = 0, col = "red")
# Plot residuals vs. fitted values for Model 4_FieldGoal
plot(model4_fg$fitted.values, model4_fg$residuals,
xlab = "Fitted Values", ylab = "Residuals",
main = "Residuals vs Fitted Values (Model 4 FG)")
abline(h = 0, col = "red")
Residuals vs. Fitted Values (Absolute Residuals) Plot
# Plot residuals vs. fitted values (absolute residuals) for Model 4
plot(model4_wp$fitted.values, abs(model4_wp$residuals),
xlab = "Fitted Values", ylab = "Absolute Residuals",
main = "Absolute Residuals vs Fitted Values (Model 4 WP)")
abline(h = mean(abs(model4_wp$residuals)), col = "red")
# Plot residuals vs. fitted values (absolute residuals) for Model 4
plot(model4_pts$fitted.values, abs(model4_pts$residuals),
xlab = "Fitted Values", ylab = "Absolute Residuals",
main = "Absolute Residuals vs Fitted Values (Model 4 Points)")
abline(h = mean(abs(model4_wp$residuals)), col = "red")
# Plot residuals vs. fitted values (absolute residuals) for Model 4
plot(model4_fg$fitted.values, abs(model4_fg$residuals),
xlab = "Fitted Values", ylab = "Absolute Residuals",
main = "Absolute Residuals vs Fitted Values (Model 4 FG)")
abline(h = mean(abs(model4_wp$residuals)), col = "red")
Q-Q Plot
# Q-Q plot for Model 4
qqnorm(model4_wp$residuals, main = "Q-Q Plot (Model 4 WP)")
qqline(model4_wp$residuals, col = "red")
# Q-Q plot for Model 4
qqnorm(model4_pts$residuals, main = "Q-Q Plot (Model 4 PTS)")
qqline(model4_pts$residuals, col = "red")
# Q-Q plot for Model 4
qqnorm(model4_fg$residuals, main = "Q-Q Plot (Model 4 FG)")
qqline(model4_fg$residuals, col = "red")
# Shapiro-Wilk test for normality
shapiro.test(model4_wp$residuals)
##
## Shapiro-Wilk normality test
##
## data: model4_wp$residuals
## W = 0.9924, p-value = 0.7582
# Shapiro-Wilk test for normality
shapiro.test(model4_pts$residuals)
##
## Shapiro-Wilk normality test
##
## data: model4_pts$residuals
## W = 0.99405, p-value = 0.8942
# Shapiro-Wilk test for normality
shapiro.test(model4_fg$residuals)
##
## Shapiro-Wilk normality test
##
## data: model4_fg$residuals
## W = 0.99172, p-value = 0.6946
# Calculate VIF for Model 4
vif(model4_wp)
## GVIF Df GVIF^(1/(2*Df))
## `3PointPercentage` 2.489760 1 1.577897
## Games 18.112269 1 4.255851
## FieldGoal 4.329421 1 2.080726
## Pace 3.620380 1 1.902730
## factor(Team) 11.584348 29 1.043140
## factor(Year) 27.631588 3 1.738739
vif(model4_pts)
## GVIF Df GVIF^(1/(2*Df))
## `3PointPercentage` 2.489760 1 1.577897
## Games 18.112269 1 4.255851
## FieldGoal 4.329421 1 2.080726
## Pace 3.620380 1 1.902730
## factor(Team) 11.584348 29 1.043140
## factor(Year) 27.631588 3 1.738739
vif(model4_fg)
## GVIF Df GVIF^(1/(2*Df))
## `3PointPercentage` 2.489760 1 1.577897
## Games 18.112269 1 4.255851
## FieldGoal 4.329421 1 2.080726
## Pace 3.620380 1 1.902730
## factor(Team) 11.584348 29 1.043140
## factor(Year) 27.631588 3 1.738739
# Predictions for fixed effects models
predictions_fe_winning <- predict(model4_wp, data = data)
predictions_fe_total_points <- predict(model4_pts, data = data)
predictions_fe_field_goal_percentage <- predict(model4_fg, data = data)
# Comparison with actual values
comparison_winning_percentage <- data.frame(Actual = data$WinningPercentage, Predicted = predictions_fe_winning)
comparison_winning_percentage <- round(comparison_winning_percentage, 3)
comparison_total_points <- data.frame(Actual = data$Points, Predicted = predictions_fe_total_points)
comparison_total_points <- round(comparison_total_points)
comparison_field_goal_percentage <- data.frame(Actual = data$FieldGoalsPercentage, Predicted = predictions_fe_field_goal_percentage)
comparison_field_goal_percentage <- round(comparison_field_goal_percentage, 3)
comparison_total_points
## Actual Predicted
## 1 112 112
## 2 114 114
## 3 112 111
## 4 103 102
## 5 107 106
## 6 107 108
## 7 117 116
## 8 111 112
## 9 107 108
## 10 106 106
## 11 118 114
## 12 109 112
## 13 116 114
## 14 113 114
## 15 113 114
## 16 112 111
## 17 119 118
## 18 113 112
## 19 116 117
## 20 106 108
## 21 110 109
## 22 107 107
## 23 111 113
## 24 114 112
## 25 115 116
## 26 110 111
## 27 114 113
## 28 113 113
## 29 111 113
## 30 114 114
## 31 114 112
## 32 113 114
## 33 119 117
## 34 110 109
## 35 111 112
## 36 104 104
## 37 112 113
## 38 115 114
## 39 107 106
## 40 114 114
## 41 109 110
## 42 115 115
## 43 114 114
## 44 110 110
## 45 113 113
## 46 108 108
## 47 120 121
## 48 112 112
## 49 115 114
## 50 107 109
## 51 105 107
## 52 104 105
## 53 114 114
## 54 115 114
## 55 116 114
## 56 114 114
## 57 111 110
## 58 111 111
## 59 116 115
## 60 117 116
## 61 114 114
## 62 112 111
## 63 113 114
## 64 115 115
## 65 112 111
## 66 108 107
## 67 108 109
## 68 113 111
## 69 105 105
## 70 111 111
## 71 110 111
## 72 112 110
## 73 108 110
## 74 112 113
## 75 116 115
## 76 110 111
## 77 116 114
## 78 116 115
## 79 109 108
## 80 106 105
## 81 104 104
## 82 104 105
## 83 110 109
## 84 115 116
## 85 106 107
## 86 110 110
## 87 113 114
## 88 109 110
## 89 114 112
## 90 109 109
## 91 118 120
## 92 118 117
## 93 113 115
## 94 111 112
## 95 113 113
## 96 112 112
## 97 114 113
## 98 116 117
## 99 110 110
## 100 119 119
## 101 111 112
## 102 116 115
## 103 114 114
## 104 117 116
## 105 117 117
## 106 110 109
## 107 117 117
## 108 116 118
## 109 114 115
## 110 116 113
## 111 118 117
## 112 111 111
## 113 115 114
## 114 114 115
## 115 113 113
## 116 121 118
## 117 113 115
## 118 113 113
## 119 117 118
## 120 113 114
comparison_winning_percentage
## Actual Predicted
## 1 0.299 0.281
## 2 0.667 0.633
## 3 0.486 0.453
## 4 0.354 0.320
## 5 0.338 0.294
## 6 0.292 0.384
## 7 0.573 0.667
## 8 0.630 0.658
## 9 0.303 0.328
## 10 0.231 0.299
## 11 0.611 0.386
## 12 0.616 0.549
## 13 0.681 0.574
## 14 0.732 0.610
## 15 0.466 0.565
## 16 0.603 0.656
## 17 0.767 0.652
## 18 0.297 0.267
## 19 0.417 0.494
## 20 0.318 0.382
## 21 0.611 0.530
## 22 0.452 0.447
## 23 0.589 0.687
## 24 0.466 0.557
## 25 0.473 0.577
## 26 0.431 0.493
## 27 0.451 0.466
## 28 0.736 0.568
## 29 0.611 0.630
## 30 0.347 0.442
## 31 0.569 0.506
## 32 0.500 0.614
## 33 0.667 0.653
## 34 0.458 0.446
## 35 0.431 0.505
## 36 0.306 0.332
## 37 0.583 0.550
## 38 0.653 0.664
## 39 0.278 0.268
## 40 0.542 0.501
## 41 0.236 0.278
## 42 0.472 0.449
## 43 0.653 0.704
## 44 0.583 0.533
## 45 0.528 0.561
## 46 0.556 0.539
## 47 0.639 0.767
## 48 0.319 0.394
## 49 0.431 0.443
## 50 0.569 0.558
## 51 0.306 0.309
## 52 0.292 0.305
## 53 0.681 0.608
## 54 0.708 0.698
## 55 0.583 0.500
## 56 0.431 0.465
## 57 0.458 0.365
## 58 0.375 0.492
## 59 0.722 0.628
## 60 0.472 0.367
## 61 0.524 0.571
## 62 0.622 0.609
## 63 0.537 0.577
## 64 0.524 0.561
## 65 0.561 0.530
## 66 0.537 0.487
## 67 0.634 0.519
## 68 0.585 0.554
## 69 0.280 0.210
## 70 0.646 0.559
## 71 0.244 0.352
## 72 0.305 0.434
## 73 0.512 0.549
## 74 0.402 0.562
## 75 0.683 0.621
## 76 0.646 0.656
## 77 0.622 0.670
## 78 0.561 0.505
## 79 0.439 0.403
## 80 0.451 0.434
## 81 0.293 0.341
## 82 0.268 0.299
## 83 0.622 0.605
## 84 0.780 0.657
## 85 0.329 0.319
## 86 0.366 0.362
## 87 0.415 0.436
## 88 0.585 0.595
## 89 0.598 0.595
## 90 0.427 0.428
## 91 0.500 0.534
## 92 0.695 0.627
## 93 0.549 0.556
## 94 0.329 0.339
## 95 0.488 0.488
## 96 0.622 0.555
## 97 0.463 0.517
## 98 0.646 0.638
## 99 0.207 0.262
## 100 0.537 0.596
## 101 0.268 0.343
## 102 0.427 0.388
## 103 0.537 0.556
## 104 0.524 0.536
## 105 0.622 0.552
## 106 0.537 0.492
## 107 0.707 0.646
## 108 0.512 0.523
## 109 0.512 0.460
## 110 0.573 0.537
## 111 0.488 0.518
## 112 0.415 0.376
## 113 0.659 0.651
## 114 0.549 0.591
## 115 0.402 0.391
## 116 0.585 0.493
## 117 0.268 0.324
## 118 0.500 0.541
## 119 0.451 0.530
## 120 0.427 0.436
comparison_field_goal_percentage
## Actual Predicted
## 1 0.449 0.445
## 2 0.461 0.462
## 3 0.448 0.457
## 4 0.434 0.437
## 5 0.447 0.454
## 6 0.458 0.461
## 7 0.461 0.471
## 8 0.473 0.480
## 9 0.459 0.454
## 10 0.438 0.439
## 11 0.451 0.453
## 12 0.476 0.472
## 13 0.466 0.464
## 14 0.480 0.476
## 15 0.468 0.459
## 16 0.468 0.467
## 17 0.476 0.467
## 18 0.441 0.442
## 19 0.465 0.471
## 20 0.447 0.442
## 21 0.468 0.455
## 22 0.444 0.446
## 23 0.468 0.473
## 24 0.468 0.462
## 25 0.463 0.467
## 26 0.462 0.470
## 27 0.472 0.468
## 28 0.458 0.449
## 29 0.471 0.466
## 30 0.457 0.469
## 31 0.468 0.467
## 32 0.466 0.467
## 33 0.494 0.489
## 34 0.455 0.454
## 35 0.476 0.478
## 36 0.450 0.451
## 37 0.470 0.468
## 38 0.485 0.492
## 39 0.452 0.447
## 40 0.468 0.465
## 41 0.444 0.445
## 42 0.474 0.473
## 43 0.482 0.483
## 44 0.472 0.470
## 45 0.467 0.466
## 46 0.468 0.461
## 47 0.487 0.488
## 48 0.448 0.453
## 49 0.477 0.471
## 50 0.456 0.459
## 51 0.441 0.438
## 52 0.429 0.438
## 53 0.476 0.474
## 54 0.490 0.486
## 55 0.453 0.465
## 56 0.481 0.477
## 57 0.462 0.461
## 58 0.448 0.446
## 59 0.468 0.475
## 60 0.475 0.471
## 61 0.470 0.472
## 62 0.466 0.461
## 63 0.475 0.475
## 64 0.468 0.468
## 65 0.480 0.476
## 66 0.469 0.465
## 67 0.461 0.457
## 68 0.483 0.475
## 69 0.431 0.437
## 70 0.469 0.464
## 71 0.456 0.449
## 72 0.463 0.463
## 73 0.458 0.461
## 74 0.469 0.472
## 75 0.461 0.470
## 76 0.467 0.471
## 77 0.468 0.469
## 78 0.457 0.462
## 79 0.457 0.458
## 80 0.437 0.440
## 81 0.430 0.436
## 82 0.434 0.435
## 83 0.466 0.465
## 84 0.485 0.480
## 85 0.442 0.439
## 86 0.460 0.460
## 87 0.467 0.468
## 88 0.445 0.453
## 89 0.471 0.466
## 90 0.472 0.466
## 91 0.483 0.486
## 92 0.475 0.478
## 93 0.487 0.484
## 94 0.457 0.455
## 95 0.490 0.485
## 96 0.488 0.487
## 97 0.475 0.471
## 98 0.504 0.498
## 99 0.454 0.457
## 100 0.479 0.486
## 101 0.457 0.461
## 102 0.469 0.473
## 103 0.477 0.475
## 104 0.482 0.484
## 105 0.475 0.475
## 106 0.460 0.464
## 107 0.473 0.480
## 108 0.490 0.478
## 109 0.480 0.480
## 110 0.470 0.469
## 111 0.465 0.475
## 112 0.470 0.458
## 113 0.487 0.485
## 114 0.467 0.482
## 115 0.474 0.461
## 116 0.494 0.490
## 117 0.465 0.469
## 118 0.459 0.462
## 119 0.473 0.476
## 120 0.485 0.482