# MLB, NBA, and NFL Player Salaries (R)

library(lattice)  # statistical graphics

# variables in contract data from spotrac.com (August 2015)
#   player: player name (contract years)
#   position: position on team  
#   team: team abbreviation 
#   teamsignedwith: team that signed the original contract  
#   age: age in years as of August 2015 
#   years:  years as player in league
#   contract: dollars in contract   
#   guaranteed: guaranteed dollars in contract  
#   guaranteedpct: percentage of contract dollars guaranteed
#   salary: annual salary in dollares   
#   yearfreeagent: year player becomes free agent
# 
#   additional created variables 
#   salarymm: salary in millions
#   leaguename: full league name
#   league: league abbreviation

# read data for Major League Baseball
mlb_contract_data <- read.csv("/Users/jyothi/Downloads/SADS_Chapter_1/mlb_player_salaries_2015.csv")
mlb_contract_data$leaguename <- rep("Major League Baseball", 
    length = nrow(mlb_contract_data))      
mlb_contract_data$salarymm <- mlb_contract_data$salary/1000000    
mlb_contract_data$league <- rep("MLB", length = nrow(mlb_contract_data))
print(summary(mlb_contract_data))
##                           player       position        team    
##  Chris Young (2015-2015)     :  2   RP     :266   TEX    : 36  
##  A.J. Burnett (2015-2015)    :  1   SP     :192   NYM    : 34  
##  A.J. Ellis (2015-2015)      :  1   C      : 72   NYY    : 33  
##  A.J. Pierzynski (2015-2015) :  1   LF     : 60   PIT    : 33  
##  A.J. Pollock (2015-2015)    :  1   SS     : 56   BOS    : 32  
##  A.J. Ramos (2015-2015)      :  1   2B     : 54   COL    : 32  
##  (Other)                     :891   (Other):198   (Other):698  
##  teamsignedwith      age            years           contract        
##  TBR    : 38    Min.   :20.00   Min.   : 1.000   Min.   :   505700  
##  ATL    : 36    1st Qu.:26.00   1st Qu.: 1.000   1st Qu.:   510575  
##  ARI    : 35    Median :28.00   Median : 1.000   Median :  1150000  
##  PHI    : 35    Mean   :28.59   Mean   : 1.951   Mean   : 16297072  
##  BOS    : 34    3rd Qu.:31.00   3rd Qu.: 2.000   3rd Qu.: 11000000  
##  NYY    : 34    Max.   :42.00   Max.   :13.000   Max.   :325000000  
##  (Other):686    NA's   :6                                           
##      salary         yearfreeagent     leaguename           salarymm      
##  Min.   :  505700   Min.   :   0.0   Length:898         Min.   : 0.5057  
##  1st Qu.:  510575   1st Qu.:   0.0   Class :character   1st Qu.: 0.5106  
##  Median : 1093800   Median :   0.0   Mode  :character   Median : 1.0938  
##  Mean   : 4135897   Mean   : 788.7                      Mean   : 4.1359  
##  3rd Qu.: 5585938   3rd Qu.:2016.0                      3rd Qu.: 5.5859  
##  Max.   :31000000   Max.   :2029.0                      Max.   :31.0000  
##                                                                          
##     league         
##  Length:898        
##  Class :character  
##  Mode  :character  
##                    
##                    
##                    
## 
# variables for plotting
mlb_data_plot <- mlb_contract_data[, c("salarymm","leaguename")]

nba_contract_data <- read.csv("/Users/jyothi/Downloads/SADS_Chapter_1/nba_player_salaries_2015.csv")
nba_contract_data$leaguename <- rep("National Basketball Association", 
    length = nrow(nba_contract_data))       
nba_contract_data$salarymm <- nba_contract_data$salary/1000000    
nba_contract_data$league <- rep("NBA", length = nrow(nba_contract_data))  
print(summary(nba_contract_data))
##                          player    position       team     teamsignedwith
##  Aaron Brooks (2015-2015)   :  1   C  : 75   BOS    : 19   SAC    : 22   
##  Aaron Gordon (2014-2015)   :  1   PF :103   DAL    : 19   DAL    : 19   
##  Aaron Harrison (2015-2016) :  1   PG : 96   DET    : 18   CLE    : 18   
##  Adonis Thomas (2015-2016)  :  1   SF : 90   BKN    : 17   TOR    : 18   
##  Adreian Payne (2014-2015)  :  1   SG : 95   POR    : 17   BOS    : 17   
##  Al Horford (2011-2015)     :  1             SAS    : 17   DEN    : 17   
##  (Other)                    :453             (Other):352   (Other):348   
##       age            years          contract           guaranteed       
##  Min.   : 0.00   Min.   :1.000   Min.   :   525093   Min.   :    11937  
##  1st Qu.:23.00   1st Qu.:2.000   1st Qu.:  2521245   1st Qu.:  2363280  
##  Median :25.00   Median :3.000   Median :  6366420   Median :  6544000  
##  Mean   :25.73   Mean   :2.854   Mean   : 18316559   Mean   : 18458452  
##  3rd Qu.:29.00   3rd Qu.:4.000   3rd Qu.: 21350000   3rd Qu.: 21350000  
##  Max.   :39.00   Max.   :6.000   Max.   :145000000   Max.   :145000000  
##                                                      NA's   :18         
##  guaranteedpct        salary         yearfreeagent   leaguename       
##  Min.   :  1.39   Min.   :  428498   Min.   :   0   Length:459        
##  1st Qu.:100.00   1st Qu.: 1178610   1st Qu.:2016   Class :character  
##  Median :100.00   Median : 2873772   Median :2017   Mode  :character  
##  Mean   : 88.38   Mean   : 5107176   Mean   :2013                     
##  3rd Qu.:100.00   3rd Qu.: 6707950   3rd Qu.:2018                     
##  Max.   :100.00   Max.   :29000000   Max.   :2021                     
##  NA's   :18                                                           
##     salarymm          league         
##  Min.   : 0.4285   Length:459        
##  1st Qu.: 1.1786   Class :character  
##  Median : 2.8738   Mode  :character  
##  Mean   : 5.1072                     
##  3rd Qu.: 6.7080                     
##  Max.   :29.0000                     
## 
# variables for plotting
nba_data_plot <- nba_contract_data[, c("salarymm","leaguename")]

nfl_contract_data <- read.csv("/Users/jyothi/Downloads/SADS_Chapter_1/nfl_player_salaries_2015.csv")
nfl_contract_data$leaguename <- rep("National Football League", 
    length = nrow(nfl_contract_data))     
nfl_contract_data$salarymm <- nfl_contract_data$salary/1000000    
nfl_contract_data$league <- rep("NFL", length = nrow(nfl_contract_data))
print(summary(nfl_contract_data))
##                       player        position         team     
##  Andre Davis (20152017)  :   2   WR     : 374   NYG    :  93  
##  Mike Harris (20152015)  :   2   CB     : 307   NO     :  92  
##  Taiwan Jones (20152017) :   2   DE     : 224   PIT    :  92  
##  A.J. Bouye (20132015)   :   1   S      : 219   SEA    :  92  
##  A.J. Cann (20152018)    :   1   DT     : 214   BAL    :  91  
##  A.J. Cruz (20152017)    :   1   OLB    : 201   IND    :  91  
##  (Other)                 :2860   (Other):1330   (Other):2318  
##  teamsignedwith      age             years          contract        
##  BAL    :  96   Min.   :  0.00   Min.   :1.000   Min.   :   420000  
##  SEA    :  96   1st Qu.: 23.00   1st Qu.:2.000   1st Qu.:  1390000  
##  NO     :  95   Median : 25.00   Median :3.000   Median :  1900000  
##  SF     :  94   Mean   : 21.97   Mean   :2.947   Mean   :  6279961  
##  GB     :  93   3rd Qu.: 27.00   3rd Qu.:4.000   3rd Qu.:  3858298  
##  ATL    :  92   Max.   :975.00   Max.   :8.000   Max.   :126700000  
##  (Other):2303                                                       
##      salary           guaranteed       guaranteedpct    yearfreeagent 
##  Min.   :  420000   Min.   :     500   Min.   :  0.03   Min.   :   0  
##  1st Qu.:  525667   1st Qu.:   50000   1st Qu.:  2.50   1st Qu.:2016  
##  Median :  630000   Median :  470252   Median : 17.34   Median :2017  
##  Mean   : 1711055   Mean   : 3628381   Mean   : 24.70   Mean   :2017  
##  3rd Qu.: 1500000   3rd Qu.: 3318968   3rd Qu.: 39.48   3rd Qu.:2018  
##  Max.   :22000000   Max.   :61542000   Max.   :100.00   Max.   :2107  
##                     NA's   :866        NA's   :866                    
##   leaguename           salarymm          league         
##  Length:2869        Min.   : 0.4200   Length:2869       
##  Class :character   1st Qu.: 0.5257   Class :character  
##  Mode  :character   Median : 0.6300   Mode  :character  
##                     Mean   : 1.7111                     
##                     3rd Qu.: 1.5000                     
##                     Max.   :22.0000                     
## 
# variables for plotting
nfl_data_plot <- nfl_contract_data[, c("salarymm","leaguename")]

# merge contract data with variables for plotting
plotting_data_frame <- rbind(mlb_data_plot, nba_data_plot, nfl_data_plot) 

# generate the histogram lattice for comparing player salaries
# across the three leagues in this study
lattice_object <- histogram(~salarymm | leaguename, plotting_data_frame,
    type = "density", xlab = "Annual Salary ($ millions)", layout = c(1,3))

# print to file 
pdf(file = "fig_understanding_markets_player_salaries.pdf", 
     width = 8.5, height = 11)
print(lattice_object)
dev.off()
## quartz_off_screen 
##                 2