This is an R HTML document. When you click the Knit HTML button a web page will be generated that includes both content as well as the output of any embedded R code chunks within the document. You can embed an R code chunk like this:
#nba home advantage and offensive/defensive analysis of 2018-19 season #using bayesian hierarchical modeling #get data path<-'Desktop/nba_database/Regular+Season/team_stats_nba.com/game_logs/boxscore/2018-19.csv' data<-read.csv(path) #get rid of duplicate games l<-c() for(i in 1:nrow(data)){ if(substr(data[i, "MATCHUP"], 5, 5)=='@'){l<-append(l, i)} } data<-data[-l,] #create dataframe to store relevant data #we will only use home team, away team, home points and away points df <- matrix(data = NA, nrow=nrow(data), ncol = 5) colnames(df)<- c('home_team', 'away_team', 'home_points', 'away_points', 'team_id') df<-as.data.frame(df) #appropriate to note some of these games went to overtime and this is not #accounted for in this analysis #fill matrix with relevant data for(i in 1:nrow(df)){ df[i,]<-c(data[i, 'TEAM_ABBREVIATION'], substr(data[i, "MATCHUP"], 9, 12), data[i, 'PTS'], data[i, 'PTS']-data[i, 'PLUS_MINUS'], as.character(data[i, 'TEAM_ID'])) } ############################### ############################### ############################### #create a mixed effects model in stan to sample from library(rstan)
options(mc.cores = parallel::detectCores()) rstan_options(auto_write = TRUE) #stan_model = " #data { #int<lower=0> n_teams; //number of teams #int<lower=0> n_games; //number of games #int<lower=0> home_team[n_games]; //home team index #int<lower=0> away_team[n_games]; //away team index #int<lower=0> home_pts[n_games]; //home team points #int<lower=0> away_pts[n_games]; //away team points #} #parameters { #//hyper parameteres #real mu_off; //hyper prior for mean of offensive ability of team #real<lower=0> tau_off; //hyper prior for sd of offensive ability of team #real mu_def; //hyper prior for mean of defensive ability of team #real<lower=0> tau_def; //hyper prior for sd of defensive ability of team #//parameters #real home; //home team advatage in pts #vector[n_teams] off; //offensive ability of each team #vector[n_teams] def; //defensive ability of each team #} #model { #//hyperpriors, model should not be very sensitive to these #mu_off ~ normal(0,.5); #tau_off ~ normal(0,2); #mu_def ~ normal(0,.5); #tau_def ~ normal(0,2); #//priors #off~normal(mu_off,tau_off); #def~normal(mu_def,tau_def); #home~normal(3,5); //they say 3pts for home court is what vegas prices in #//likelihood, poisson for counts, 111.2 is the average pts a team scores #home_pts~poisson(111+home+off[home_team]-def[away_team]); #away_pts~poisson(111+off[away_team]-def[home_team]); #} #" stan_model = [1256 chars quoted with '"'] #fit the stan model model = stan_model(model_code=stan_model)
## Running /Library/Frameworks/R.framework/Resources/bin/R CMD SHLIB foo.c
## clang -mmacosx-version-min=10.13 -I"/Library/Frameworks/R.framework/Resources/include" -DNDEBUG -I"/Library/Frameworks/R.framework/Versions/4.2/Resources/library/Rcpp/include/" -I"/Library/Frameworks/R.framework/Versions/4.2/Resources/library/RcppEigen/include/" -I"/Library/Frameworks/R.framework/Versions/4.2/Resources/library/RcppEigen/include/unsupported" -I"/Library/Frameworks/R.framework/Versions/4.2/Resources/library/BH/include" -I"/Library/Frameworks/R.framework/Versions/4.2/Resources/library/StanHeaders/include/src/" -I"/Library/Frameworks/R.framework/Versions/4.2/Resources/library/StanHeaders/include/" -I"/Library/Frameworks/R.framework/Versions/4.2/Resources/library/RcppParallel/include/" -I"/Library/Frameworks/R.framework/Versions/4.2/Resources/library/rstan/include" -DEIGEN_NO_DEBUG -DBOOST_DISABLE_ASSERTS -DBOOST_PENDING_INTEGER_LOG2_HPP -DSTAN_THREADS -DBOOST_NO_AUTO_PTR -include '/Library/Frameworks/R.framework/Versions/4.2/Resources/library/StanHeaders/include/stan/math/prim/mat/fun/Eigen.hpp' -D_REENTRANT -DRCPP_PARALLEL_USE_TBB=1 -I/usr/local/include -fPIC -Wall -g -O2 -c foo.c -o foo.o
## In file included from <built-in>:1:
## In file included from /Library/Frameworks/R.framework/Versions/4.2/Resources/library/StanHeaders/include/stan/math/prim/mat/fun/Eigen.hpp:13:
## In file included from /Library/Frameworks/R.framework/Versions/4.2/Resources/library/RcppEigen/include/Eigen/Dense:1:
## In file included from /Library/Frameworks/R.framework/Versions/4.2/Resources/library/RcppEigen/include/Eigen/Core:88:
## /Library/Frameworks/R.framework/Versions/4.2/Resources/library/RcppEigen/include/Eigen/src/Core/util/Macros.h:628:1: error: unknown type name 'namespace'
## namespace Eigen {
## ^
## /Library/Frameworks/R.framework/Versions/4.2/Resources/library/RcppEigen/include/Eigen/src/Core/util/Macros.h:628:16: error: expected ';' after top level declarator
## namespace Eigen {
## ^
## ;
## In file included from <built-in>:1:
## In file included from /Library/Frameworks/R.framework/Versions/4.2/Resources/library/StanHeaders/include/stan/math/prim/mat/fun/Eigen.hpp:13:
## In file included from /Library/Frameworks/R.framework/Versions/4.2/Resources/library/RcppEigen/include/Eigen/Dense:1:
## /Library/Frameworks/R.framework/Versions/4.2/Resources/library/RcppEigen/include/Eigen/Core:96:10: fatal error: 'complex' file not found
## #include <complex>
## ^~~~~~~~~
## 3 errors generated.
## make: *** [foo.o] Error 1
#prepare parameters to be feed into stan model nteams<-length(unique(df$home_team)) ngames<-nrow(df) teams<-unique(df$home_team) home_team<-unlist(sapply(1:ngames, function(g) which(teams==df$home_team[g]))) away_team<-unlist(sapply(1:ngames, function(g) which(teams==df$away_team[g]))) home_pts<-as.numeric(df$home_points) away_pts<-as.numeric(df$away_points) #fit model to data r = sampling(model, list(n_teams=nteams, n_games = ngames, teams = teams, home_team = home_team, away_team = away_team, home_pts = home_pts, away_pts = away_pts)) #extract parameter estimates params<-extract(r) #posterior distribution of home parameter hist(params$home, main = 'home court advantage in 2018-19')
mean(params$home)
## [1] 2.643348
#take out posterior mean to be best estimates of parameters #we also have estimates for sd of these estimates but will not visualize them here off<-colMeans(params$off) def<-colMeans(params$def) #create df to visualize estimates of off and def paramteres for each team results <- matrix(data = NA, nrow=30, ncol = 5) colnames(results)<- c('def', 'off', 'TEAM_NAME', 'playoff_round_reached', 'team_id') results<-as.data.frame(results) results$off<-off results$def<-def results$TEAM_NAME<-c('MEM', 'CHA', 'DEN', 'ATL', 'MIL', 'POR', 'LAC', 'BKN', 'NYK', 'SAS', 'PHI', 'NOP', 'OKC', 'WAS', 'MIN', 'DAL', 'UTA', 'DET', 'MIA', 'CLE', 'CHI', 'LAL', 'TOR', 'SAC', 'IND', 'HOU', 'BOS', 'GSW', 'ORL', 'PHX') #include which round the team reached that postseason results$playoff_round_reached<-as.factor(c(0, 0, 2, 0, 3, 3, 1, 1, 0, 1, 2, 0, 1, 0, 0, 0, 1, 1, 0, 0, 0, 0, 4, 0, 1, 2, 2, 4, 0, 0)) #plot results for off and def plot(results$off, results$def, col = results$playoff_round_reached, pch = 19, main = 'back~no playoffs, red~1st round exit, green~2nd round exit, blue ~ conf finals, light blue~finals') text(results$off, results$def, labels=results$TEAM_NAME, pos=3)
#comparison with traditional nba stats, off and def rating via nba.com path<-'Desktop/nba_database/Regular+Season/team_stats_nba.com/general/advanced/2018-19.csv' data<-read.csv(path) data<-data[,c('OFF_RATING', 'DEF_RATING', 'TEAM_NAME', 'TEAM_ID')] data$TEAM_NAME<-c('ATL', 'BOS', 'BKN', 'CHA', 'CHI', 'CLE', 'DAL', 'DEN', 'DET', 'GSW', 'HOU', 'IND', 'LAC', 'LAL', 'MEM', 'MIA', 'MIL', 'MIN', 'NOP', 'NYK', 'OKC', 'ORL', 'PHI', 'PHX', 'POR', 'SAC', 'SAS', 'TOR', 'UTA', 'WAS') #create one dataframe for ploting and regression ease new <- merge(data, results, by = 'TEAM_NAME') plot(new$OFF_RATING, new$off, col = new$playoff_round_reached, pch = 19) text(new$OFF_RATING, new$off, labels=new$TEAM_NAME, pos=3)
summary(lm(new$OFF_RATING~new$off))
## ## Call: ## lm(formula = new$OFF_RATING ~ new$off) ## ## Residuals: ## Min 1Q Median 3Q Max ## -3.6450 -1.5027 0.2735 1.0807 3.6458 ## ## Coefficients: ## Estimate Std. Error t value Pr(>|t|) ## (Intercept) 110.07315 0.32754 336.063 < 2e-16 *** ## new$off 0.62391 0.08765 7.118 9.57e-08 *** ## --- ## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1 ## ## Residual standard error: 1.773 on 28 degrees of freedom ## Multiple R-squared: 0.6441, Adjusted R-squared: 0.6314 ## F-statistic: 50.67 on 1 and 28 DF, p-value: 9.575e-08
plot(new$DEF_RATING, new$def, col = new$playoff_round_reached, pch = 19) text(new$DEF_RATING, new$def, labels=new$TEAM_NAME, pos=3)
summary(lm(new$OFF_RATING~new$off))
## ## Call: ## lm(formula = new$OFF_RATING ~ new$off) ## ## Residuals: ## Min 1Q Median 3Q Max ## -3.6450 -1.5027 0.2735 1.0807 3.6458 ## ## Coefficients: ## Estimate Std. Error t value Pr(>|t|) ## (Intercept) 110.07315 0.32754 336.063 < 2e-16 *** ## new$off 0.62391 0.08765 7.118 9.57e-08 *** ## --- ## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1 ## ## Residual standard error: 1.773 on 28 degrees of freedom ## Multiple R-squared: 0.6441, Adjusted R-squared: 0.6314 ## F-statistic: 50.67 on 1 and 28 DF, p-value: 9.575e-08
#these are only estimates with uncertainty attached to them, and this #uncertainty is not visualized here. However, we might over-extrapolate to say #offensive rating overrated Portland's and Houston's offense this season #while underrating the lakers and hawks offense #more over-extrapolation, defensive rating underrated the Cavs (still awful) #defense, and was too kind the the Hawks defense #interestingly enough, the bucks had the best defensive rating this regular #season, but are more middle of the pack by this metric. #you might interpret these graphs as follows: #for offense, a team to the right of another team has a better offensive rating #while a team above another team has a better offense by my parameter #for defense, a team to the left of another team has a better defensive rating #while a team above another team has a better defense by my parameter
You can also embed plots, for example: