#Load data
getwd()
## [1] "/Users/anesunyawata/Documents/Data110 Final Project"
players <- read.csv("players.csv")
salaries <- read.csv("salaries_1985to2018.csv")
#Eliminate repeat player names
players_unique <- distinct(players, name, .keep_all = TRUE)
#Create common variable in order to merge the 2 data sets.
colnames(players_unique)[1]<-colnames(salaries)[2]
full_data <- merge(players_unique, salaries, by= "player_id")
#Checking to make sure data sets were merged correctly
str((full_data))
## 'data.frame': 14024 obs. of 26 variables:
## $ player_id : chr "abdelal01" "abdelal01" "abdelal01" "abdelal01" ...
## $ birthDate : chr "24-Jun-68" "24-Jun-68" "24-Jun-68" "24-Jun-68" ...
## $ birthPlace : chr "Cairo, Egypt" "Cairo, Egypt" "Cairo, Egypt" "Cairo, Egypt" ...
## $ career_AST : num 0.3 0.3 0.3 0.3 0.3 3.6 3.6 3.6 3.6 3.5 ...
## $ career_FG. : chr "50.2" "50.2" "50.2" "50.2" ...
## $ career_FG3. : chr "0" "0" "0" "0" ...
## $ career_FT. : chr "70.1" "70.1" "70.1" "70.1" ...
## $ career_G : int 256 256 256 256 256 1560 1560 1560 1560 586 ...
## $ career_PER : chr "13" "13" "13" "13" ...
## $ career_PTS : num 5.7 5.7 5.7 5.7 5.7 24.6 24.6 24.6 24.6 14.6 ...
## $ career_TRB : chr "3.3" "3.3" "3.3" "3.3" ...
## $ career_WS : chr "4.8" "4.8" "4.8" "4.8" ...
## $ career_eFG. : chr "50.2" "50.2" "50.2" "50.2" ...
## $ draft_pick : chr "25th overall" "25th overall" "25th overall" "25th overall" ...
## $ draft_round : chr "1st round" "1st round" "1st round" "1st round" ...
## $ draft_team : chr "Portland Trail Blazers" "Portland Trail Blazers" "Portland Trail Blazers" "Portland Trail Blazers" ...
## $ draft_year : chr "1990" "1990" "1990" "1990" ...
## $ name : chr "Alaa Abdelnaby" "Alaa Abdelnaby" "Alaa Abdelnaby" "Alaa Abdelnaby" ...
## $ position : chr "Power Forward" "Power Forward" "Power Forward" "Power Forward" ...
## $ shoots : chr "Right" "Right" "Right" "Right" ...
## $ league : chr "NBA" "NBA" "NBA" "NBA" ...
## $ salary : int 500000 805000 494000 650000 395000 2000000 3000000 2030000 1530000 2008000 ...
## $ season : chr "1992-93" "1993-94" "1991-92" "1994-95" ...
## $ season_end : int 1993 1994 1992 1995 1991 1988 1989 1986 1985 1992 ...
## $ season_start: int 1992 1993 1991 1994 1990 1987 1988 1985 1984 1991 ...
## $ team : chr "Boston Celtics" "Boston Celtics" "Portland Trail Blazers" "Sacramento Kings" ...
ggplot(full_data, aes(x = salary))+
geom_histogram()
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

#Created new variable called salary_millions in order to make the x-axis on the histogram easier to read.
full_data <-
mutate(full_data,
salary_millions = salary/1000000
)
#Customized histogram
ggplot(full_data, aes(x = salary_millions))+
geom_histogram(fill = "blue", color = "black")+
ggtitle("Frequency of NBA salary ranges since 1985")+
xlab("Salary (Millions)")+
ylab("Frequency")+
theme_bw()+
theme(panel.grid.major = element_blank())+
theme(panel.grid.minor = element_blank())
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

#Create new variable to designate years before 1998
full_data <-
mutate(full_data, before_1998 = season_end < 1998)
#check that before_1998 is leveled correctly
str(full_data$before_1998)
## logi [1:14024] TRUE TRUE TRUE TRUE TRUE TRUE ...
test <- sample(1: nrow(full_data), 10)
full_data$season_end[test]
## [1] 1988 2016 2009 2018 1985 1998 2007 2009 2014 2017
full_data$before_1998[test]
## [1] TRUE FALSE FALSE FALSE TRUE FALSE FALSE FALSE FALSE FALSE
#Look for linear for relationships between statistics indicating player effectiveness and salary
ggplot(full_data, aes(x = salary_millions, y = career_PTS, color = before_1998))+
geom_point(size=0.5)

ggplot(full_data, aes(x = salary_millions, y = career_AST, color = before_1998))+
geom_point(size=0.5)

ggplot(full_data, aes(x = salary_millions, y = career_G, color = before_1998))+
geom_point(size=0.5)

ggplot(full_data, aes(x = salary_millions, y = career_PER, color = before_1998))+
geom_point(size=0.5)

#Didn't see any linear relationships between salaries and major stats, so I decided to look at the average in salaries over time as well as the standard deviation.
(meansby_year <- aggregate(salary_millions~season_end, data = full_data, mean))
## season_end salary_millions
## 1 1985 0.3996364
## 2 1986 0.3726309
## 3 1987 0.5430333
## 4 1988 0.4579613
## 5 1989 0.5286683
## 6 1990 1.6709375
## 7 1991 0.8311449
## 8 1992 0.9533906
## 9 1993 1.0579855
## 10 1994 1.2654576
## 11 1995 1.3576655
## 12 1996 1.7417068
## 13 1997 1.9377223
## 14 1998 2.1307594
## 15 1999 2.4583953
## 16 2000 2.5030532
## 17 2001 3.2225792
## 18 2002 3.3875299
## 19 2003 3.6380194
## 20 2004 3.6516106
## 21 2005 3.7099994
## 22 2006 3.8541600
## 23 2007 3.8156623
## 24 2008 4.2761453
## 25 2009 4.5974022
## 26 2010 4.4649377
## 27 2011 4.3200063
## 28 2012 4.3005636
## 29 2013 4.2519810
## 30 2014 4.8743350
## 31 2015 4.0352025
## 32 2016 4.4363571
## 33 2017 5.3021175
## 34 2018 6.0390105
ggplot(meansby_year, aes(x= season_end, y = salary_millions))+
geom_line()

(sdby_year <- aggregate(salary_millions~season_end, data = full_data, sd))
## season_end salary_millions
## 1 1985 0.3442535
## 2 1986 0.3422999
## 3 1987 0.3851189
## 4 1988 0.4113069
## 5 1989 0.4735973
## 6 1990 0.6464278
## 7 1991 0.6609400
## 8 1992 0.8008670
## 9 1993 0.8752406
## 10 1994 0.9759089
## 11 1995 1.1573435
## 12 1996 1.6786839
## 13 1997 2.4732533
## 14 1998 2.7707181
## 15 1999 2.6781868
## 16 2000 3.0487491
## 17 2001 3.4590165
## 18 2002 3.6827405
## 19 2003 4.0373344
## 20 2004 4.0127969
## 21 2005 3.9621434
## 22 2006 4.0244066
## 23 2007 4.1718258
## 24 2008 4.4784066
## 25 2009 4.7330220
## 26 2010 4.6480387
## 27 2011 4.5609979
## 28 2012 4.6120690
## 29 2013 4.6319243
## 30 2014 4.8714458
## 31 2015 4.6649256
## 32 2016 5.1071934
## 33 2017 6.3015544
## 34 2018 7.2451318
ggplot(sdby_year, aes(x= season_end, y = salary_millions))+
geom_line()

#Used the filter function to isolate by position; since there were so many positions due to combinations, I had to narrow it down to just the main 5, eliminating a number of entries.
save <- c(1, 5, 11, 19, 27)
filtered_positions <-filter(full_data, position %in% levels(as.factor(full_data$position))[save])
ggplot(filtered_positions, aes(x = salary_millions))+
geom_histogram()+
facet_wrap(~position)
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

(meanby_year <- aggregate(salary_millions~season_end + position, data = filtered_positions, mean))
## season_end position salary_millions
## 1 1985 Center 0.4603939
## 2 1986 Center 0.4516957
## 3 1987 Center 0.6646667
## 4 1988 Center 0.5871073
## 5 1989 Center 0.6123197
## 6 1990 Center 1.6476923
## 7 1991 Center 0.9664231
## 8 1992 Center 1.0078065
## 9 1993 Center 1.3152857
## 10 1994 Center 1.3885156
## 11 1995 Center 1.4505500
## 12 1996 Center 1.7947500
## 13 1997 Center 1.8908047
## 14 1998 Center 2.2234496
## 15 1999 Center 3.0327343
## 16 2000 Center 2.7812897
## 17 2001 Center 3.5191462
## 18 2002 Center 3.2847491
## 19 2003 Center 3.7713133
## 20 2004 Center 3.4378641
## 21 2005 Center 3.4059746
## 22 2006 Center 3.1651377
## 23 2007 Center 3.2001369
## 24 2008 Center 3.7015472
## 25 2009 Center 4.1490338
## 26 2010 Center 4.7592147
## 27 2011 Center 3.4878533
## 28 2012 Center 3.4997587
## 29 2013 Center 4.3661598
## 30 2014 Center 4.3001805
## 31 2015 Center 3.7612817
## 32 2016 Center 5.1018702
## 33 2017 Center 6.4249237
## 34 2018 Center 7.0070884
## 35 1985 Point Guard 0.2768421
## 36 1986 Point Guard 0.2354063
## 37 1987 Point Guard 0.4031667
## 38 1988 Point Guard 0.2944671
## 39 1989 Point Guard 0.3395366
## 40 1990 Point Guard 1.5883333
## 41 1991 Point Guard 0.6622549
## 42 1992 Point Guard 0.8094737
## 43 1993 Point Guard 0.8405806
## 44 1994 Point Guard 1.1194912
## 45 1995 Point Guard 1.1480222
## 46 1996 Point Guard 1.4888627
## 47 1997 Point Guard 1.7118018
## 48 1998 Point Guard 1.7379784
## 49 1999 Point Guard 2.0886203
## 50 2000 Point Guard 2.2179489
## 51 2001 Point Guard 2.6594750
## 52 2002 Point Guard 3.0411538
## 53 2003 Point Guard 3.0327076
## 54 2004 Point Guard 2.9635167
## 55 2005 Point Guard 2.8950876
## 56 2006 Point Guard 3.2231790
## 57 2007 Point Guard 3.7989193
## 58 2008 Point Guard 4.0871232
## 59 2009 Point Guard 3.9015430
## 60 2010 Point Guard 3.8515298
## 61 2011 Point Guard 4.0007976
## 62 2012 Point Guard 4.0619049
## 63 2013 Point Guard 4.7205477
## 64 2014 Point Guard 5.1076943
## 65 2015 Point Guard 4.2291127
## 66 2016 Point Guard 4.6036304
## 67 2017 Point Guard 5.4093337
## 68 2018 Point Guard 6.0169971
## 69 1985 Power Forward 0.3569048
## 70 1986 Power Forward 0.3595860
## 71 1987 Power Forward 0.3287777
## 72 1988 Power Forward 0.4268141
## 73 1989 Power Forward 0.5130000
## 74 1990 Power Forward 1.6714286
## 75 1991 Power Forward 0.7620270
## 76 1992 Power Forward 1.0132059
## 77 1993 Power Forward 0.9821842
## 78 1994 Power Forward 1.0785128
## 79 1995 Power Forward 0.9214782
## 80 1996 Power Forward 1.2944333
## 81 1997 Power Forward 1.3955000
## 82 1998 Power Forward 1.2192458
## 83 1999 Power Forward 1.6157814
## 84 2000 Power Forward 1.6376176
## 85 2001 Power Forward 2.5101811
## 86 2002 Power Forward 2.7690089
## 87 2003 Power Forward 2.9925673
## 88 2004 Power Forward 2.7024661
## 89 2005 Power Forward 2.8587976
## 90 2006 Power Forward 2.0455323
## 91 2007 Power Forward 1.7151865
## 92 2008 Power Forward 2.3401987
## 93 2009 Power Forward 2.5676874
## 94 2010 Power Forward 2.7855118
## 95 2011 Power Forward 2.9561931
## 96 2012 Power Forward 2.5251658
## 97 2013 Power Forward 2.8910066
## 98 2014 Power Forward 4.2471591
## 99 2015 Power Forward 3.1015253
## 100 2016 Power Forward 3.3158407
## 101 2017 Power Forward 3.2514119
## 102 2018 Power Forward 4.0941274
## 103 1985 Shooting Guard 0.2689000
## 104 1986 Shooting Guard 0.2539547
## 105 1987 Shooting Guard 0.4475000
## 106 1988 Shooting Guard 0.2680745
## 107 1989 Shooting Guard 0.4142946
## 108 1990 Shooting Guard 1.1500000
## 109 1991 Shooting Guard 0.6238387
## 110 1992 Shooting Guard 0.6768636
## 111 1993 Shooting Guard 0.8003223
## 112 1994 Shooting Guard 1.0186154
## 113 1995 Shooting Guard 1.0196798
## 114 1996 Shooting Guard 1.5849091
## 115 1997 Shooting Guard 1.8515333
## 116 1998 Shooting Guard 1.8515504
## 117 1999 Shooting Guard 1.9789582
## 118 2000 Shooting Guard 2.5775880
## 119 2001 Shooting Guard 3.8955180
## 120 2002 Shooting Guard 3.3387479
## 121 2003 Shooting Guard 3.3631814
## 122 2004 Shooting Guard 2.8127214
## 123 2005 Shooting Guard 3.7397592
## 124 2006 Shooting Guard 2.9073560
## 125 2007 Shooting Guard 1.8503145
## 126 2008 Shooting Guard 2.4853887
## 127 2009 Shooting Guard 2.9446719
## 128 2010 Shooting Guard 3.6568460
## 129 2011 Shooting Guard 3.5795574
## 130 2012 Shooting Guard 3.7609707
## 131 2013 Shooting Guard 3.6306395
## 132 2014 Shooting Guard 3.4510088
## 133 2015 Shooting Guard 2.6285946
## 134 2016 Shooting Guard 2.6612470
## 135 2017 Shooting Guard 3.4193254
## 136 2018 Shooting Guard 4.1250403
## 137 1985 Small Forward 0.4662000
## 138 1986 Small Forward 0.3865127
## 139 1987 Small Forward 0.6390000
## 140 1988 Small Forward 0.4773122
## 141 1989 Small Forward 0.4590226
## 142 1990 Small Forward 1.6833333
## 143 1991 Small Forward 0.7045897
## 144 1992 Small Forward 0.8555128
## 145 1993 Small Forward 0.8932391
## 146 1994 Small Forward 1.1966389
## 147 1995 Small Forward 1.1552559
## 148 1996 Small Forward 1.3084750
## 149 1997 Small Forward 1.3678927
## 150 1998 Small Forward 1.6620889
## 151 1999 Small Forward 2.1975111
## 152 2000 Small Forward 1.9741302
## 153 2001 Small Forward 2.5707266
## 154 2002 Small Forward 3.2910366
## 155 2003 Small Forward 3.7287049
## 156 2004 Small Forward 3.2809051
## 157 2005 Small Forward 4.1072986
## 158 2006 Small Forward 3.8831909
## 159 2007 Small Forward 3.9460484
## 160 2008 Small Forward 4.6905846
## 161 2009 Small Forward 4.5407749
## 162 2010 Small Forward 4.0814645
## 163 2011 Small Forward 3.4176058
## 164 2012 Small Forward 3.0355831
## 165 2013 Small Forward 2.4039074
## 166 2014 Small Forward 3.3349018
## 167 2015 Small Forward 2.3306651
## 168 2016 Small Forward 2.4412495
## 169 2017 Small Forward 3.2162949
## 170 2018 Small Forward 4.0968207
#Created final plot
ggplot(meanby_year, aes(x= season_end, y = salary_millions, color=position))+
geom_line()+
facet_wrap(~position)+
xlab("Year")+
ylab("Salary (Millions)")+
ggtitle("NBA Salaries by Position since 1985")+
theme_dark()+
theme(plot.background = element_rect(fill = "light blue"))+
theme(panel.background = element_rect(fill = 'black'))+
theme(panel.grid.major = element_blank())+
theme(panel.grid.minor = element_blank())+
theme(axis.text.x = element_text(size = 8))+
theme(axis.text.y = element_text(size = 10))+
theme(axis.title.x = element_text(size = 16))+
theme(axis.title.y = element_text(size = 16))
