# Set working directory and path to data
setwd("C:/Users/LENOVO/Downloads/Regression Model/Project") # Example path on Windows
# Clear the workspace
rm(list = ls()) # Clear environment
gc() # Clear unused memory
## used (Mb) gc trigger (Mb) max used (Mb)
## Ncells 521680 27.9 1160321 62 660385 35.3
## Vcells 948263 7.3 8388608 64 1769879 13.6
cat("\f") # Clear the console
dev.off # Clear the charts
## function (which = dev.cur())
## {
## if (which == 1)
## stop("cannot shut down device 1 (the null device)")
## .External(C_devoff, as.integer(which))
## dev.cur()
## }
## <bytecode: 0x0000021fb886c450>
## <environment: namespace:grDevices>
library(readxl)
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
df <- read.csv("C:\\Users\\LENOVO\\Downloads\\Regression Model\\Project\\sportsref_download.csv")
df1 <- read.csv("C:\\Users\\LENOVO\\Downloads\\Regression Model\\Project\\sportsref_download (1).csv")
df2 <- read.csv("C:\\Users\\LENOVO\\Downloads\\Regression Model\\Project\\sportsref_download (2).csv")
df3 <- read.csv("C:\\Users\\LENOVO\\Downloads\\Regression Model\\Project\\sportsref_download (3).csv")
# Merge data frames
merged_df <- bind_rows(df, df1, df2, df3)
# Write the merged dataset to a CSV file
write.csv(merged_df, file = "merged_dataset.csv", row.names = FALSE)
# Print first 10 rows
head(merged_df, 10)
## Rk Team G MP FG FGA FG. X3P X3PA X3P. X2P
## 1 1 Milwaukee Bucks* 73 241.0 43.3 90.9 0.476 13.8 38.9 0.355 29.5
## 2 2 Houston Rockets* 72 241.4 40.8 90.4 0.451 15.6 45.3 0.345 25.1
## 3 3 Dallas Mavericks* 75 242.3 41.7 90.3 0.461 15.1 41.3 0.367 26.5
## 4 4 Los Angeles Clippers* 72 241.4 41.6 89.2 0.466 12.4 33.5 0.371 29.1
## 5 5 New Orleans Pelicans 72 242.1 42.6 91.6 0.465 13.6 36.9 0.370 28.9
## 6 6 Portland Trail Blazers* 74 241.0 42.2 91.2 0.463 12.9 34.1 0.377 29.3
## 7 7 Washington Wizards 72 241.0 41.5 90.9 0.457 12.0 32.6 0.368 29.5
## 8 8 San Antonio Spurs 71 242.5 42.2 89.4 0.472 10.7 28.5 0.376 31.5
## 9 9 Boston Celtics* 72 242.1 41.3 89.6 0.461 12.6 34.5 0.364 28.7
## 10 10 Phoenix Suns 73 241.0 41.2 88.1 0.468 11.4 31.8 0.358 29.8
## X2PA X2P. FT FTA FT. ORB DRB TRB AST STL BLK TOV PF PTS
## 1 52.0 0.567 18.3 24.7 0.742 9.5 42.2 51.7 25.9 7.2 5.9 15.1 19.6 118.7
## 2 45.2 0.557 20.6 26.1 0.791 9.8 34.5 44.3 21.6 8.7 5.2 14.7 21.8 117.8
## 3 49.0 0.541 18.6 23.8 0.779 10.5 36.4 46.9 24.7 6.1 4.8 12.7 19.5 117.0
## 4 55.8 0.522 20.8 26.3 0.791 10.7 37.0 47.7 23.7 7.1 4.7 14.6 22.1 116.3
## 5 54.8 0.528 17.1 23.4 0.729 11.1 35.4 46.5 26.8 7.5 5.0 16.4 21.2 115.8
## 6 57.1 0.514 17.7 22.1 0.804 10.2 35.1 45.3 20.6 6.3 6.1 12.8 21.7 115.0
## 7 58.3 0.506 19.4 24.6 0.788 10.2 31.9 42.0 25.0 8.0 4.3 14.2 22.7 114.4
## 8 61.0 0.516 19.0 23.4 0.810 9.0 35.6 44.6 24.7 7.3 5.5 12.6 19.4 114.1
## 9 55.0 0.522 18.6 23.2 0.801 10.7 35.4 46.1 23.0 8.3 5.6 13.8 21.6 113.7
## 10 56.3 0.529 19.9 23.8 0.834 9.8 33.8 43.5 27.2 7.7 4.0 14.8 22.0 113.6
str(merged_df)
## 'data.frame': 120 obs. of 25 variables:
## $ Rk : int 1 2 3 4 5 6 7 8 9 10 ...
## $ Team: chr "Milwaukee Bucks*" "Houston Rockets*" "Dallas Mavericks*" "Los Angeles Clippers*" ...
## $ G : int 73 72 75 72 72 74 72 71 72 73 ...
## $ MP : num 241 241 242 241 242 ...
## $ FG : num 43.3 40.8 41.7 41.6 42.6 42.2 41.5 42.2 41.3 41.2 ...
## $ FGA : num 90.9 90.4 90.3 89.2 91.6 91.2 90.9 89.4 89.6 88.1 ...
## $ FG. : num 0.476 0.451 0.461 0.466 0.465 0.463 0.457 0.472 0.461 0.468 ...
## $ X3P : num 13.8 15.6 15.1 12.4 13.6 12.9 12 10.7 12.6 11.4 ...
## $ X3PA: num 38.9 45.3 41.3 33.5 36.9 34.1 32.6 28.5 34.5 31.8 ...
## $ X3P.: num 0.355 0.345 0.367 0.371 0.37 0.377 0.368 0.376 0.364 0.358 ...
## $ X2P : num 29.5 25.1 26.5 29.1 28.9 29.3 29.5 31.5 28.7 29.8 ...
## $ X2PA: num 52 45.2 49 55.8 54.8 57.1 58.3 61 55 56.3 ...
## $ X2P.: num 0.567 0.557 0.541 0.522 0.528 0.514 0.506 0.516 0.522 0.529 ...
## $ FT : num 18.3 20.6 18.6 20.8 17.1 17.7 19.4 19 18.6 19.9 ...
## $ FTA : num 24.7 26.1 23.8 26.3 23.4 22.1 24.6 23.4 23.2 23.8 ...
## $ FT. : num 0.742 0.791 0.779 0.791 0.729 0.804 0.788 0.81 0.801 0.834 ...
## $ ORB : num 9.5 9.8 10.5 10.7 11.1 10.2 10.2 9 10.7 9.8 ...
## $ DRB : num 42.2 34.5 36.4 37 35.4 35.1 31.9 35.6 35.4 33.8 ...
## $ TRB : num 51.7 44.3 46.9 47.7 46.5 45.3 42 44.6 46.1 43.5 ...
## $ AST : num 25.9 21.6 24.7 23.7 26.8 20.6 25 24.7 23 27.2 ...
## $ STL : num 7.2 8.7 6.1 7.1 7.5 6.3 8 7.3 8.3 7.7 ...
## $ BLK : num 5.9 5.2 4.8 4.7 5 6.1 4.3 5.5 5.6 4 ...
## $ TOV : num 15.1 14.7 12.7 14.6 16.4 12.8 14.2 12.6 13.8 14.8 ...
## $ PF : num 19.6 21.8 19.5 22.1 21.2 21.7 22.7 19.4 21.6 22 ...
## $ PTS : num 119 118 117 116 116 ...
colSums(is.na(merged_df))
## Rk Team G MP FG FGA FG. X3P X3PA X3P. X2P X2PA X2P. FT FTA FT.
## 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
## ORB DRB TRB AST STL BLK TOV PF PTS
## 0 0 0 0 0 0 0 0 0
sapply(merged_df, class)
## Rk Team G MP FG FGA
## "integer" "character" "integer" "numeric" "numeric" "numeric"
## FG. X3P X3PA X3P. X2P X2PA
## "numeric" "numeric" "numeric" "numeric" "numeric" "numeric"
## X2P. FT FTA FT. ORB DRB
## "numeric" "numeric" "numeric" "numeric" "numeric" "numeric"
## TRB AST STL BLK TOV PF
## "numeric" "numeric" "numeric" "numeric" "numeric" "numeric"
## PTS
## "numeric"
library(stargazer)
##
## Please cite as:
## Hlavac, Marek (2022). stargazer: Well-Formatted Regression and Summary Statistics Tables.
## R package version 5.2.3. https://CRAN.R-project.org/package=stargazer
stargazer(merged_df, type = "text", summary.stat = c("mean", "min", "max", "sd", "median"))
##
## ==================================================
## Statistic Mean Min Max St. Dev. Median
## --------------------------------------------------
## Rk 15.500 1 30 8.692 15.5
## G 76.650 64 82 5.640 78.5
## MP 241.583 240.000 243.700 0.810 241.500
## FG 41.163 37.300 44.700 1.563 41.300
## FGA 88.407 83.700 94.400 2.234 88.400
## FG. 0.466 0.429 0.504 0.015 0.468
## X3P 12.420 9.600 16.700 1.495 12.200
## X3PA 34.532 28.000 45.300 3.609 34.200
## X3P. 0.359 0.323 0.411 0.016 0.358
## X2P 28.747 24.500 33.900 2.107 28.700
## X2PA 53.873 43.300 62.100 4.066 53.950
## X2P. 0.534 0.476 0.586 0.021 0.532
## FT 17.536 13.800 21.000 1.455 17.500
## FTA 22.575 17.500 26.600 1.816 22.400
## FT. 0.777 0.694 0.839 0.028 0.779
## ORB 10.172 7.600 14.100 1.127 10.150
## DRB 34.077 30.300 42.200 1.708 34.050
## TRB 44.242 38.800 51.700 1.982 44.200
## AST 24.788 20.600 29.800 1.774 24.700
## STL 7.538 6.100 10.000 0.790 7.450
## BLK 4.787 3.000 6.600 0.683 4.750
## TOV 14.063 11.100 16.500 1.095 14.150
## PF 19.922 17.200 23.100 1.272 19.900
## PTS 112.266 102.900 120.700 3.853 112.850
## --------------------------------------------------
library(visdat)
## Warning: package 'visdat' was built under R version 4.3.2
vis_dat(merged_df)

names(merged_df)
## [1] "Rk" "Team" "G" "MP" "FG" "FGA" "FG." "X3P" "X3PA" "X3P."
## [11] "X2P" "X2PA" "X2P." "FT" "FTA" "FT." "ORB" "DRB" "TRB" "AST"
## [21] "STL" "BLK" "TOV" "PF" "PTS"
# Fit a linear regression model
model <- lm(PTS ~ FG, data = merged_df)
# Summary of the model
summary(model)
##
## Call:
## lm(formula = PTS ~ FG, data = merged_df)
##
## Residuals:
## Min 1Q Median 3Q Max
## -4.8322 -1.4754 -0.2299 1.3163 6.2945
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 25.9297 4.9091 5.282 5.92e-07 ***
## FG 2.0974 0.1192 17.599 < 2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 2.032 on 118 degrees of freedom
## Multiple R-squared: 0.7241, Adjusted R-squared: 0.7218
## F-statistic: 309.7 on 1 and 118 DF, p-value: < 2.2e-16
# Plot diagnostic plots for the model
par(mfrow = c(2, 2))
plot(model)

# Fit a linear regression model with multiple predictors
model1 <- lm(PTS ~ FG + AST + ORB, data = merged_df)
# Summary of the model
summary(model1)
##
## Call:
## lm(formula = PTS ~ FG + AST + ORB, data = merged_df)
##
## Residuals:
## Min 1Q Median 3Q Max
## -5.1766 -1.2938 -0.1248 1.3321 5.5435
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 27.2515 4.9538 5.501 2.28e-07 ***
## FG 2.2706 0.1473 15.412 < 2e-16 ***
## AST -0.2213 0.1289 -1.717 0.0886 .
## ORB -0.2911 0.1671 -1.742 0.0841 .
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 2.006 on 116 degrees of freedom
## Multiple R-squared: 0.7358, Adjusted R-squared: 0.7289
## F-statistic: 107.7 on 3 and 116 DF, p-value: < 2.2e-16
par(mfrow = c(2, 2))
plot(model1)
