# MLB, NBA, and NFL Player Salaries (R)
library(lattice) # statistical graphics
# variables in contract data from spotrac.com (August 2015)
# player: player name (contract years)
# position: position on team
# team: team abbreviation
# teamsignedwith: team that signed the original contract
# age: age in years as of August 2015
# years: years as player in league
# contract: dollars in contract
# guaranteed: guaranteed dollars in contract
# guaranteedpct: percentage of contract dollars guaranteed
# salary: annual salary in dollares
# yearfreeagent: year player becomes free agent
#
# additional created variables
# salarymm: salary in millions
# leaguename: full league name
# league: league abbreviation
# read data for Major League Baseball
mlb_contract_data <- read.csv("/Users/jyothi/Downloads/SADS_Chapter_1/mlb_player_salaries_2015.csv")
mlb_contract_data$leaguename <- rep("Major League Baseball",
length = nrow(mlb_contract_data))
mlb_contract_data$salarymm <- mlb_contract_data$salary/1000000
mlb_contract_data$league <- rep("MLB", length = nrow(mlb_contract_data))
print(summary(mlb_contract_data))
## player position team
## Chris Young (2015-2015) : 2 RP :266 TEX : 36
## A.J. Burnett (2015-2015) : 1 SP :192 NYM : 34
## A.J. Ellis (2015-2015) : 1 C : 72 NYY : 33
## A.J. Pierzynski (2015-2015) : 1 LF : 60 PIT : 33
## A.J. Pollock (2015-2015) : 1 SS : 56 BOS : 32
## A.J. Ramos (2015-2015) : 1 2B : 54 COL : 32
## (Other) :891 (Other):198 (Other):698
## teamsignedwith age years contract
## TBR : 38 Min. :20.00 Min. : 1.000 Min. : 505700
## ATL : 36 1st Qu.:26.00 1st Qu.: 1.000 1st Qu.: 510575
## ARI : 35 Median :28.00 Median : 1.000 Median : 1150000
## PHI : 35 Mean :28.59 Mean : 1.951 Mean : 16297072
## BOS : 34 3rd Qu.:31.00 3rd Qu.: 2.000 3rd Qu.: 11000000
## NYY : 34 Max. :42.00 Max. :13.000 Max. :325000000
## (Other):686 NA's :6
## salary yearfreeagent leaguename salarymm
## Min. : 505700 Min. : 0.0 Length:898 Min. : 0.5057
## 1st Qu.: 510575 1st Qu.: 0.0 Class :character 1st Qu.: 0.5106
## Median : 1093800 Median : 0.0 Mode :character Median : 1.0938
## Mean : 4135897 Mean : 788.7 Mean : 4.1359
## 3rd Qu.: 5585938 3rd Qu.:2016.0 3rd Qu.: 5.5859
## Max. :31000000 Max. :2029.0 Max. :31.0000
##
## league
## Length:898
## Class :character
## Mode :character
##
##
##
##
# variables for plotting
mlb_data_plot <- mlb_contract_data[, c("salarymm","leaguename")]
nba_contract_data <- read.csv("/Users/jyothi/Downloads/SADS_Chapter_1/nba_player_salaries_2015.csv")
nba_contract_data$leaguename <- rep("National Basketball Association",
length = nrow(nba_contract_data))
nba_contract_data$salarymm <- nba_contract_data$salary/1000000
nba_contract_data$league <- rep("NBA", length = nrow(nba_contract_data))
print(summary(nba_contract_data))
## player position team teamsignedwith
## Aaron Brooks (2015-2015) : 1 C : 75 BOS : 19 SAC : 22
## Aaron Gordon (2014-2015) : 1 PF :103 DAL : 19 DAL : 19
## Aaron Harrison (2015-2016) : 1 PG : 96 DET : 18 CLE : 18
## Adonis Thomas (2015-2016) : 1 SF : 90 BKN : 17 TOR : 18
## Adreian Payne (2014-2015) : 1 SG : 95 POR : 17 BOS : 17
## Al Horford (2011-2015) : 1 SAS : 17 DEN : 17
## (Other) :453 (Other):352 (Other):348
## age years contract guaranteed
## Min. : 0.00 Min. :1.000 Min. : 525093 Min. : 11937
## 1st Qu.:23.00 1st Qu.:2.000 1st Qu.: 2521245 1st Qu.: 2363280
## Median :25.00 Median :3.000 Median : 6366420 Median : 6544000
## Mean :25.73 Mean :2.854 Mean : 18316559 Mean : 18458452
## 3rd Qu.:29.00 3rd Qu.:4.000 3rd Qu.: 21350000 3rd Qu.: 21350000
## Max. :39.00 Max. :6.000 Max. :145000000 Max. :145000000
## NA's :18
## guaranteedpct salary yearfreeagent leaguename
## Min. : 1.39 Min. : 428498 Min. : 0 Length:459
## 1st Qu.:100.00 1st Qu.: 1178610 1st Qu.:2016 Class :character
## Median :100.00 Median : 2873772 Median :2017 Mode :character
## Mean : 88.38 Mean : 5107176 Mean :2013
## 3rd Qu.:100.00 3rd Qu.: 6707950 3rd Qu.:2018
## Max. :100.00 Max. :29000000 Max. :2021
## NA's :18
## salarymm league
## Min. : 0.4285 Length:459
## 1st Qu.: 1.1786 Class :character
## Median : 2.8738 Mode :character
## Mean : 5.1072
## 3rd Qu.: 6.7080
## Max. :29.0000
##
# variables for plotting
nba_data_plot <- nba_contract_data[, c("salarymm","leaguename")]
nfl_contract_data <- read.csv("/Users/jyothi/Downloads/SADS_Chapter_1/nfl_player_salaries_2015.csv")
nfl_contract_data$leaguename <- rep("National Football League",
length = nrow(nfl_contract_data))
nfl_contract_data$salarymm <- nfl_contract_data$salary/1000000
nfl_contract_data$league <- rep("NFL", length = nrow(nfl_contract_data))
print(summary(nfl_contract_data))
## player position team
## Andre Davis (20152017) : 2 WR : 374 NYG : 93
## Mike Harris (20152015) : 2 CB : 307 NO : 92
## Taiwan Jones (20152017) : 2 DE : 224 PIT : 92
## A.J. Bouye (20132015) : 1 S : 219 SEA : 92
## A.J. Cann (20152018) : 1 DT : 214 BAL : 91
## A.J. Cruz (20152017) : 1 OLB : 201 IND : 91
## (Other) :2860 (Other):1330 (Other):2318
## teamsignedwith age years contract
## BAL : 96 Min. : 0.00 Min. :1.000 Min. : 420000
## SEA : 96 1st Qu.: 23.00 1st Qu.:2.000 1st Qu.: 1390000
## NO : 95 Median : 25.00 Median :3.000 Median : 1900000
## SF : 94 Mean : 21.97 Mean :2.947 Mean : 6279961
## GB : 93 3rd Qu.: 27.00 3rd Qu.:4.000 3rd Qu.: 3858298
## ATL : 92 Max. :975.00 Max. :8.000 Max. :126700000
## (Other):2303
## salary guaranteed guaranteedpct yearfreeagent
## Min. : 420000 Min. : 500 Min. : 0.03 Min. : 0
## 1st Qu.: 525667 1st Qu.: 50000 1st Qu.: 2.50 1st Qu.:2016
## Median : 630000 Median : 470252 Median : 17.34 Median :2017
## Mean : 1711055 Mean : 3628381 Mean : 24.70 Mean :2017
## 3rd Qu.: 1500000 3rd Qu.: 3318968 3rd Qu.: 39.48 3rd Qu.:2018
## Max. :22000000 Max. :61542000 Max. :100.00 Max. :2107
## NA's :866 NA's :866
## leaguename salarymm league
## Length:2869 Min. : 0.4200 Length:2869
## Class :character 1st Qu.: 0.5257 Class :character
## Mode :character Median : 0.6300 Mode :character
## Mean : 1.7111
## 3rd Qu.: 1.5000
## Max. :22.0000
##
# variables for plotting
nfl_data_plot <- nfl_contract_data[, c("salarymm","leaguename")]
# merge contract data with variables for plotting
plotting_data_frame <- rbind(mlb_data_plot, nba_data_plot, nfl_data_plot)
# generate the histogram lattice for comparing player salaries
# across the three leagues in this study
lattice_object <- histogram(~salarymm | leaguename, plotting_data_frame,
type = "density", xlab = "Annual Salary ($ millions)", layout = c(1,3))
# print to file
pdf(file = "fig_understanding_markets_player_salaries.pdf",
width = 8.5, height = 11)
print(lattice_object)
dev.off()
## quartz_off_screen
## 2