library(readr)
library(psych)
library(formattable)
library(tidyverse)
data <- read_csv("Players_Estimated_Metrics_Season23_24.csv")
data <- tibble(data)
#The first course of action I want to take with the data is create a subset that only includes players who played at least 50 games and 30 minutes per game this past season. This will eliminate any outliars who did not play much and could have really high stats.
eligible_players <- subset(data, GP >= 50 & MIN >= 30)
head(eligible_players)
## # A tibble: 6 × 32
## PLAYER_ID PLAYER_NAME GP W L W_PCT MIN E_OFF_RATING E_DEF_RATING
## <dbl> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 1629673 Jordan Poole 78 15 63 0.192 30.1 108. 118.
## 2 1630166 Deni Avdija 75 14 61 0.187 30.1 109. 116.
## 3 1629027 Trae Young 54 22 32 0.407 36 116. 118.
## 4 1630169 Tyrese Hali… 69 40 29 0.58 32.2 120. 115
## 5 1628398 Kyle Kuzma 70 13 57 0.186 32.6 109. 118.
## 6 1626156 D'Angelo Ru… 76 46 30 0.605 32.7 116. 113.
## # ℹ 23 more variables: E_NET_RATING <dbl>, E_AST_RATIO <dbl>, E_OREB_PCT <dbl>,
## # E_DREB_PCT <dbl>, E_REB_PCT <dbl>, E_TOV_PCT <dbl>, E_USG_PCT <dbl>,
## # E_PACE <dbl>, GP_RANK <dbl>, W_RANK <dbl>, L_RANK <dbl>, W_PCT_RANK <dbl>,
## # MIN_RANK <dbl>, E_OFF_RATING_RANK <dbl>, E_DEF_RATING_RANK <dbl>,
## # E_NET_RATING_RANK <dbl>, E_AST_RATIO_RANK <dbl>, E_OREB_PCT_RANK <dbl>,
## # E_DREB_PCT_RANK <dbl>, E_REB_PCT_RANK <dbl>, E_TOV_PCT_RANK <dbl>,
## # E_USG_PCT_RANK <dbl>, E_PACE_RANK <dbl>
#Next we will look at the average offensive and defensive rating for the subset as well as the leaders of each respective rating.
mean_off_rating <- mean(eligible_players$E_OFF_RATING, na.rm=TRUE)
mean_def_rating <- mean(eligible_players$E_DEF_RATING, na.rm = TRUE)
#create an index variable for each rating
best_off_rating_index <- which.min(eligible_players$E_OFF_RATING)
best_off_rating <-eligible_players$PLAYER_NAME[best_off_rating_index]
best_def_rating_index <- which.min(eligible_players$E_DEF_RATING)
best_def_rating <-eligible_players$PLAYER_NAME[best_def_rating_index]
best_net_rating_index <- which.min(eligible_players$E_NET_RATING)
best_net_rating <-eligible_players$PLAYER_NAME[best_net_rating_index]
#Print the Output
print(paste("The average offensive rating for players with at least 50 games played was", round(mean_off_rating,2), "and the mean defensive rating was", round(mean_def_rating,2), ". The player with the best offensive rating was", best_off_rating, "and the player with the best defensive rating was", best_def_rating, ". The player with the best net rating was", best_net_rating, "."))
## [1] "The average offensive rating for players with at least 50 games played was 114.4 and the mean defensive rating was 112.41 . The player with the best offensive rating was Jaren Jackson Jr. and the player with the best defensive rating was Rudy Gobert . The player with the best net rating was Miles Bridges ."
no_of_players = nrow(eligible_players)
print(paste("There are", no_of_players, "that played at least 50 games this season and at least 30 minutes per game."))
## [1] "There are 87 that played at least 50 games this season and at least 30 minutes per game."
cor_wins <- cor(eligible_players$E_NET_RATING,eligible_players$W)
print(paste("There is a", cor_wins, "correlation between net rating and number of wins for a player. "))
## [1] "There is a 0.876401376077466 correlation between net rating and number of wins for a player. "
mean(eligible_players$E_NET_RATING)
## [1] 1.988506