library(readr)
library(psych)
library(formattable)
library(tidyverse)
data <- read_csv("Players_Estimated_Metrics_Season23_24.csv")
data <- tibble(data)
#The first course of action I want to take with the data is create a subset that only includes players who played at least 50 games and 30 minutes per game this past season. This will eliminate any outliars who did not play much and could have really high stats. 
eligible_players <- subset(data, GP >= 50 & MIN >= 30)
head(eligible_players)
## # A tibble: 6 × 32
##   PLAYER_ID PLAYER_NAME     GP     W     L W_PCT   MIN E_OFF_RATING E_DEF_RATING
##       <dbl> <chr>        <dbl> <dbl> <dbl> <dbl> <dbl>        <dbl>        <dbl>
## 1   1629673 Jordan Poole    78    15    63 0.192  30.1         108.         118.
## 2   1630166 Deni Avdija     75    14    61 0.187  30.1         109.         116.
## 3   1629027 Trae Young      54    22    32 0.407  36           116.         118.
## 4   1630169 Tyrese Hali…    69    40    29 0.58   32.2         120.         115 
## 5   1628398 Kyle Kuzma      70    13    57 0.186  32.6         109.         118.
## 6   1626156 D'Angelo Ru…    76    46    30 0.605  32.7         116.         113.
## # ℹ 23 more variables: E_NET_RATING <dbl>, E_AST_RATIO <dbl>, E_OREB_PCT <dbl>,
## #   E_DREB_PCT <dbl>, E_REB_PCT <dbl>, E_TOV_PCT <dbl>, E_USG_PCT <dbl>,
## #   E_PACE <dbl>, GP_RANK <dbl>, W_RANK <dbl>, L_RANK <dbl>, W_PCT_RANK <dbl>,
## #   MIN_RANK <dbl>, E_OFF_RATING_RANK <dbl>, E_DEF_RATING_RANK <dbl>,
## #   E_NET_RATING_RANK <dbl>, E_AST_RATIO_RANK <dbl>, E_OREB_PCT_RANK <dbl>,
## #   E_DREB_PCT_RANK <dbl>, E_REB_PCT_RANK <dbl>, E_TOV_PCT_RANK <dbl>,
## #   E_USG_PCT_RANK <dbl>, E_PACE_RANK <dbl>
#Next we will look at the average offensive and defensive rating for the subset as well as the leaders of each respective rating. 
mean_off_rating <- mean(eligible_players$E_OFF_RATING, na.rm=TRUE)
mean_def_rating <- mean(eligible_players$E_DEF_RATING, na.rm = TRUE)
#create an index variable for each rating
best_off_rating_index <- which.min(eligible_players$E_OFF_RATING)
best_off_rating <-eligible_players$PLAYER_NAME[best_off_rating_index]
best_def_rating_index <- which.min(eligible_players$E_DEF_RATING)
best_def_rating <-eligible_players$PLAYER_NAME[best_def_rating_index]
best_net_rating_index <- which.min(eligible_players$E_NET_RATING)
best_net_rating <-eligible_players$PLAYER_NAME[best_net_rating_index]
#Print the Output
print(paste("The average offensive rating for players with at least 50 games played was", round(mean_off_rating,2), "and the mean defensive rating was", round(mean_def_rating,2), ". The player with the best offensive rating was", best_off_rating, "and the player with the best defensive rating was", best_def_rating, ". The player with the best net rating was", best_net_rating, "."))
## [1] "The average offensive rating for players with at least 50 games played was 114.4 and the mean defensive rating was 112.41 . The player with the best offensive rating was Jaren Jackson Jr. and the player with the best defensive rating was Rudy Gobert . The player with the best net rating was Miles Bridges ."
no_of_players = nrow(eligible_players)
print(paste("There are", no_of_players, "that played at least 50 games this season and at least 30 minutes per game."))
## [1] "There are 87 that played at least 50 games this season and at least 30 minutes per game."
cor_wins <- cor(eligible_players$E_NET_RATING,eligible_players$W)
print(paste("There is a", cor_wins, "correlation between net rating and number of wins for a player. "))
## [1] "There is a 0.876401376077466 correlation between net rating and number of wins for a player. "
mean(eligible_players$E_NET_RATING)
## [1] 1.988506