# Set working directory and path to data
  setwd("C:/Users/LENOVO/Downloads/Regression Model/Project")  # Example path on Windows


# Clear the workspace
  rm(list = ls()) # Clear environment
  gc()            # Clear unused memory
##          used (Mb) gc trigger (Mb) max used (Mb)
## Ncells 521680 27.9    1160321   62   660385 35.3
## Vcells 948263  7.3    8388608   64  1769879 13.6
  cat("\f")       # Clear the console
  dev.off         # Clear the charts
## function (which = dev.cur()) 
## {
##     if (which == 1) 
##         stop("cannot shut down device 1 (the null device)")
##     .External(C_devoff, as.integer(which))
##     dev.cur()
## }
## <bytecode: 0x0000021fb886c450>
## <environment: namespace:grDevices>
library(readxl)
library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
df <- read.csv("C:\\Users\\LENOVO\\Downloads\\Regression Model\\Project\\sportsref_download.csv")
df1 <- read.csv("C:\\Users\\LENOVO\\Downloads\\Regression Model\\Project\\sportsref_download (1).csv")
df2 <- read.csv("C:\\Users\\LENOVO\\Downloads\\Regression Model\\Project\\sportsref_download (2).csv")
df3 <- read.csv("C:\\Users\\LENOVO\\Downloads\\Regression Model\\Project\\sportsref_download (3).csv")
# Merge data frames
merged_df <- bind_rows(df, df1, df2, df3)
# Write the merged dataset to a CSV file
write.csv(merged_df, file = "merged_dataset.csv", row.names = FALSE)
# Print first 10 rows
head(merged_df, 10)
##    Rk                    Team  G    MP   FG  FGA   FG.  X3P X3PA  X3P.  X2P
## 1   1        Milwaukee Bucks* 73 241.0 43.3 90.9 0.476 13.8 38.9 0.355 29.5
## 2   2        Houston Rockets* 72 241.4 40.8 90.4 0.451 15.6 45.3 0.345 25.1
## 3   3       Dallas Mavericks* 75 242.3 41.7 90.3 0.461 15.1 41.3 0.367 26.5
## 4   4   Los Angeles Clippers* 72 241.4 41.6 89.2 0.466 12.4 33.5 0.371 29.1
## 5   5    New Orleans Pelicans 72 242.1 42.6 91.6 0.465 13.6 36.9 0.370 28.9
## 6   6 Portland Trail Blazers* 74 241.0 42.2 91.2 0.463 12.9 34.1 0.377 29.3
## 7   7      Washington Wizards 72 241.0 41.5 90.9 0.457 12.0 32.6 0.368 29.5
## 8   8       San Antonio Spurs 71 242.5 42.2 89.4 0.472 10.7 28.5 0.376 31.5
## 9   9         Boston Celtics* 72 242.1 41.3 89.6 0.461 12.6 34.5 0.364 28.7
## 10 10            Phoenix Suns 73 241.0 41.2 88.1 0.468 11.4 31.8 0.358 29.8
##    X2PA  X2P.   FT  FTA   FT.  ORB  DRB  TRB  AST STL BLK  TOV   PF   PTS
## 1  52.0 0.567 18.3 24.7 0.742  9.5 42.2 51.7 25.9 7.2 5.9 15.1 19.6 118.7
## 2  45.2 0.557 20.6 26.1 0.791  9.8 34.5 44.3 21.6 8.7 5.2 14.7 21.8 117.8
## 3  49.0 0.541 18.6 23.8 0.779 10.5 36.4 46.9 24.7 6.1 4.8 12.7 19.5 117.0
## 4  55.8 0.522 20.8 26.3 0.791 10.7 37.0 47.7 23.7 7.1 4.7 14.6 22.1 116.3
## 5  54.8 0.528 17.1 23.4 0.729 11.1 35.4 46.5 26.8 7.5 5.0 16.4 21.2 115.8
## 6  57.1 0.514 17.7 22.1 0.804 10.2 35.1 45.3 20.6 6.3 6.1 12.8 21.7 115.0
## 7  58.3 0.506 19.4 24.6 0.788 10.2 31.9 42.0 25.0 8.0 4.3 14.2 22.7 114.4
## 8  61.0 0.516 19.0 23.4 0.810  9.0 35.6 44.6 24.7 7.3 5.5 12.6 19.4 114.1
## 9  55.0 0.522 18.6 23.2 0.801 10.7 35.4 46.1 23.0 8.3 5.6 13.8 21.6 113.7
## 10 56.3 0.529 19.9 23.8 0.834  9.8 33.8 43.5 27.2 7.7 4.0 14.8 22.0 113.6
str(merged_df)
## 'data.frame':    120 obs. of  25 variables:
##  $ Rk  : int  1 2 3 4 5 6 7 8 9 10 ...
##  $ Team: chr  "Milwaukee Bucks*" "Houston Rockets*" "Dallas Mavericks*" "Los Angeles Clippers*" ...
##  $ G   : int  73 72 75 72 72 74 72 71 72 73 ...
##  $ MP  : num  241 241 242 241 242 ...
##  $ FG  : num  43.3 40.8 41.7 41.6 42.6 42.2 41.5 42.2 41.3 41.2 ...
##  $ FGA : num  90.9 90.4 90.3 89.2 91.6 91.2 90.9 89.4 89.6 88.1 ...
##  $ FG. : num  0.476 0.451 0.461 0.466 0.465 0.463 0.457 0.472 0.461 0.468 ...
##  $ X3P : num  13.8 15.6 15.1 12.4 13.6 12.9 12 10.7 12.6 11.4 ...
##  $ X3PA: num  38.9 45.3 41.3 33.5 36.9 34.1 32.6 28.5 34.5 31.8 ...
##  $ X3P.: num  0.355 0.345 0.367 0.371 0.37 0.377 0.368 0.376 0.364 0.358 ...
##  $ X2P : num  29.5 25.1 26.5 29.1 28.9 29.3 29.5 31.5 28.7 29.8 ...
##  $ X2PA: num  52 45.2 49 55.8 54.8 57.1 58.3 61 55 56.3 ...
##  $ X2P.: num  0.567 0.557 0.541 0.522 0.528 0.514 0.506 0.516 0.522 0.529 ...
##  $ FT  : num  18.3 20.6 18.6 20.8 17.1 17.7 19.4 19 18.6 19.9 ...
##  $ FTA : num  24.7 26.1 23.8 26.3 23.4 22.1 24.6 23.4 23.2 23.8 ...
##  $ FT. : num  0.742 0.791 0.779 0.791 0.729 0.804 0.788 0.81 0.801 0.834 ...
##  $ ORB : num  9.5 9.8 10.5 10.7 11.1 10.2 10.2 9 10.7 9.8 ...
##  $ DRB : num  42.2 34.5 36.4 37 35.4 35.1 31.9 35.6 35.4 33.8 ...
##  $ TRB : num  51.7 44.3 46.9 47.7 46.5 45.3 42 44.6 46.1 43.5 ...
##  $ AST : num  25.9 21.6 24.7 23.7 26.8 20.6 25 24.7 23 27.2 ...
##  $ STL : num  7.2 8.7 6.1 7.1 7.5 6.3 8 7.3 8.3 7.7 ...
##  $ BLK : num  5.9 5.2 4.8 4.7 5 6.1 4.3 5.5 5.6 4 ...
##  $ TOV : num  15.1 14.7 12.7 14.6 16.4 12.8 14.2 12.6 13.8 14.8 ...
##  $ PF  : num  19.6 21.8 19.5 22.1 21.2 21.7 22.7 19.4 21.6 22 ...
##  $ PTS : num  119 118 117 116 116 ...
colSums(is.na(merged_df))
##   Rk Team    G   MP   FG  FGA  FG.  X3P X3PA X3P.  X2P X2PA X2P.   FT  FTA  FT. 
##    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0 
##  ORB  DRB  TRB  AST  STL  BLK  TOV   PF  PTS 
##    0    0    0    0    0    0    0    0    0
sapply(merged_df, class)
##          Rk        Team           G          MP          FG         FGA 
##   "integer" "character"   "integer"   "numeric"   "numeric"   "numeric" 
##         FG.         X3P        X3PA        X3P.         X2P        X2PA 
##   "numeric"   "numeric"   "numeric"   "numeric"   "numeric"   "numeric" 
##        X2P.          FT         FTA         FT.         ORB         DRB 
##   "numeric"   "numeric"   "numeric"   "numeric"   "numeric"   "numeric" 
##         TRB         AST         STL         BLK         TOV          PF 
##   "numeric"   "numeric"   "numeric"   "numeric"   "numeric"   "numeric" 
##         PTS 
##   "numeric"
library(stargazer)
## 
## Please cite as:
##  Hlavac, Marek (2022). stargazer: Well-Formatted Regression and Summary Statistics Tables.
##  R package version 5.2.3. https://CRAN.R-project.org/package=stargazer
stargazer(merged_df, type = "text", summary.stat = c("mean", "min", "max", "sd", "median"))
## 
## ==================================================
## Statistic  Mean     Min     Max   St. Dev. Median 
## --------------------------------------------------
## Rk        15.500     1      30     8.692    15.5  
## G         76.650    64      82     5.640    78.5  
## MP        241.583 240.000 243.700  0.810   241.500
## FG        41.163  37.300  44.700   1.563   41.300 
## FGA       88.407  83.700  94.400   2.234   88.400 
## FG.        0.466   0.429   0.504   0.015    0.468 
## X3P       12.420   9.600  16.700   1.495   12.200 
## X3PA      34.532  28.000  45.300   3.609   34.200 
## X3P.       0.359   0.323   0.411   0.016    0.358 
## X2P       28.747  24.500  33.900   2.107   28.700 
## X2PA      53.873  43.300  62.100   4.066   53.950 
## X2P.       0.534   0.476   0.586   0.021    0.532 
## FT        17.536  13.800  21.000   1.455   17.500 
## FTA       22.575  17.500  26.600   1.816   22.400 
## FT.        0.777   0.694   0.839   0.028    0.779 
## ORB       10.172   7.600  14.100   1.127   10.150 
## DRB       34.077  30.300  42.200   1.708   34.050 
## TRB       44.242  38.800  51.700   1.982   44.200 
## AST       24.788  20.600  29.800   1.774   24.700 
## STL        7.538   6.100  10.000   0.790    7.450 
## BLK        4.787   3.000   6.600   0.683    4.750 
## TOV       14.063  11.100  16.500   1.095   14.150 
## PF        19.922  17.200  23.100   1.272   19.900 
## PTS       112.266 102.900 120.700  3.853   112.850
## --------------------------------------------------
library(visdat)
## Warning: package 'visdat' was built under R version 4.3.2
vis_dat(merged_df)

names(merged_df)
##  [1] "Rk"   "Team" "G"    "MP"   "FG"   "FGA"  "FG."  "X3P"  "X3PA" "X3P."
## [11] "X2P"  "X2PA" "X2P." "FT"   "FTA"  "FT."  "ORB"  "DRB"  "TRB"  "AST" 
## [21] "STL"  "BLK"  "TOV"  "PF"   "PTS"
# Fit a linear regression model
model <- lm(PTS ~ FG, data = merged_df)

# Summary of the model
summary(model)
## 
## Call:
## lm(formula = PTS ~ FG, data = merged_df)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -4.8322 -1.4754 -0.2299  1.3163  6.2945 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  25.9297     4.9091   5.282 5.92e-07 ***
## FG            2.0974     0.1192  17.599  < 2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 2.032 on 118 degrees of freedom
## Multiple R-squared:  0.7241, Adjusted R-squared:  0.7218 
## F-statistic: 309.7 on 1 and 118 DF,  p-value: < 2.2e-16
# Plot diagnostic plots for the model
par(mfrow = c(2, 2))
plot(model)

# Fit a linear regression model with multiple predictors
model1 <- lm(PTS ~ FG + AST + ORB, data = merged_df)

# Summary of the model
summary(model1)
## 
## Call:
## lm(formula = PTS ~ FG + AST + ORB, data = merged_df)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -5.1766 -1.2938 -0.1248  1.3321  5.5435 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  27.2515     4.9538   5.501 2.28e-07 ***
## FG            2.2706     0.1473  15.412  < 2e-16 ***
## AST          -0.2213     0.1289  -1.717   0.0886 .  
## ORB          -0.2911     0.1671  -1.742   0.0841 .  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 2.006 on 116 degrees of freedom
## Multiple R-squared:  0.7358, Adjusted R-squared:  0.7289 
## F-statistic: 107.7 on 3 and 116 DF,  p-value: < 2.2e-16
par(mfrow = c(2, 2))
plot(model1)