R Markdown
library(tidyverse)
## -- Attaching packages --------------------------------------- tidyverse 1.2.1 --
## v ggplot2 3.2.1 v purrr 0.3.2
## v tibble 2.1.3 v dplyr 0.8.3
## v tidyr 1.0.0 v stringr 1.4.0
## v readr 1.3.1 v forcats 0.4.0
## -- Conflicts ------------------------------------------ tidyverse_conflicts() --
## x dplyr::filter() masks stats::filter()
## x dplyr::lag() masks stats::lag()
setwd("C:/Users/munis/Documents/Comm in Data Science/Project 1/Data")
nbastats <- read_csv("nbasalariespoints.csv")
## Parsed with column specification:
## cols(
## .default = col_double(),
## Player = col_character(),
## Pos = col_character(),
## Tm = col_character(),
## TEAM = col_character()
## )
## See spec(...) for full column specifications.
options(scipen = 999)
nbastats <- nbastats %>%
filter(Salary > 6600000)
nbastats
## # A tibble: 101 x 33
## Rk Player Pos Age Tm G GS MP FG FGA `FG%` `3P`
## <dbl> <chr> <chr> <dbl> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 1 Steph~ PG 27 GSW 79 79 34.2 10.2 20.2 0.504 5.1
## 2 2 James~ SG 26 HOU 82 82 38.1 8.7 19.7 0.439 2.9
## 3 3 Kevin~ SF 27 OKC 72 72 35.8 9.7 19.2 0.505 2.6
## 4 4 DeMar~ C 25 SAC 65 65 34.6 9.2 20.5 0.451 1.1
## 5 5 LeBro~ SF 31 CLE 76 76 35.6 9.7 18.6 0.52 1.1
## 6 7 Antho~ PF 22 NOP 61 61 35.5 9.2 18.6 0.493 0.6
## 7 8 DeMar~ SG 26 TOR 78 78 35.9 7.9 17.7 0.446 0.6
## 8 9 Russe~ PG 27 OKC 80 80 34.4 8.2 18.1 0.454 1.3
## 9 10 Paul ~ SF 25 IND 81 81 34.8 7.5 17.9 0.418 2.6
## 10 11 Isaia~ PG 26 BOS 82 79 32.2 7.2 16.9 0.428 2
## # ... with 91 more rows, and 21 more variables: `3PA` <dbl>, `3P%` <dbl>,
## # `2P` <dbl>, `2PA` <dbl>, `2P%` <dbl>, `eFG%` <dbl>, FT <dbl>,
## # FTA <dbl>, `FT%` <dbl>, ORB <dbl>, DRB <dbl>, TRB <dbl>, AST <dbl>,
## # STL <dbl>, BLK <dbl>, TOV <dbl>, PF <dbl>, Points <dbl>, RK <dbl>,
## # TEAM <chr>, Salary <dbl>
nbastats2 <- nbastats %>%
select(Rk, Player, Points, TRB, AST, Salary)
head(nbastats2)
## # A tibble: 6 x 6
## Rk Player Points TRB AST Salary
## <dbl> <chr> <dbl> <dbl> <dbl> <dbl>
## 1 1 Stephen Curry 30.1 5.4 6.7 11370786
## 2 2 James Harden 29 6.1 7.5 15756438
## 3 3 Kevin Durant 28.2 8.2 5 20158622
## 4 4 DeMarcus Cousins 26.9 11.5 3.3 15851950
## 5 5 LeBron James 25.3 7.4 6.8 22970500
## 6 7 Anthony Davis 24.3 10.3 1.9 7070730
plot4 <- nbastats2 %>%
filter(Rk =="1" | Rk =="2" | Rk =="3" | Rk =="4" | Rk =="5") %>%
ggplot() +
geom_bar(aes(x=Player, y=Salary),
position = "dodge", stat = "identity") +
labs(title = "5 Players with Highest Points Per Game",
subtitle = "2015-2016")
plot4

plot(x=nbastats2$Salary, y=nbastats2$Rk, xlab="Yearly Salary", ylab="Points", main="NBA Salary v Points Scored", ylim = c(200,0), xlim = c(6000000, 25000000))
abline(lm(nbastats2$Rk ~ 1 + nbastats2$Salary, data=nbastats2), col="red")

rebounddata <- nbastats2 %>%
arrange(desc(TRB))
rebounddata
## # A tibble: 101 x 6
## Rk Player Points TRB AST Salary
## <dbl> <chr> <dbl> <dbl> <dbl> <dbl>
## 1 83 Dwight Howard 13.7 11.8 1.4 22359364
## 2 4 DeMarcus Cousins 26.9 11.5 3.3 15851950
## 3 46 Pau Gasol 16.5 11 4.1 7448760
## 4 7 Anthony Davis 24.3 10.3 1.9 7070730
## 5 51 Kevin Love 16 9.9 2.4 19689000
## 6 85 Marcin Gortat 13.5 9.9 1.4 11217391
## 7 79 Draymond Green 14 9.5 7.4 14260870
## 8 41 Paul Millsap 17.1 9 3.3 18671659
## 9 64 Thaddeus Young 15.1 9 1.8 11235955
## 10 211 Tristan Thompson 7.8 9 0.8 14260870
## # ... with 91 more rows
rebounddata <- rebounddata %>%
mutate(rebrank = nbastats2$Rk)
rebounddata
## # A tibble: 101 x 7
## Rk Player Points TRB AST Salary rebrank
## <dbl> <chr> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 83 Dwight Howard 13.7 11.8 1.4 22359364 1
## 2 4 DeMarcus Cousins 26.9 11.5 3.3 15851950 2
## 3 46 Pau Gasol 16.5 11 4.1 7448760 3
## 4 7 Anthony Davis 24.3 10.3 1.9 7070730 4
## 5 51 Kevin Love 16 9.9 2.4 19689000 5
## 6 85 Marcin Gortat 13.5 9.9 1.4 11217391 7
## 7 79 Draymond Green 14 9.5 7.4 14260870 8
## 8 41 Paul Millsap 17.1 9 3.3 18671659 9
## 9 64 Thaddeus Young 15.1 9 1.8 11235955 10
## 10 211 Tristan Thompson 7.8 9 0.8 14260870 11
## # ... with 91 more rows
plot(x=rebounddata$Salary, y=rebounddata$rebrank, xlab="Yearly Salary", ylab="Rank", main="NBA Salary v Rebounds", ylim = c(200,0))
abline(lm(rebounddata$rebrank ~ 1 + rebounddata$Salary, data=rebounddata), col="blue")

assistdata <- nbastats2 %>%
arrange(desc(AST))
assistdata
## # A tibble: 101 x 6
## Rk Player Points TRB AST Salary
## <dbl> <chr> <dbl> <dbl> <dbl> <dbl>
## 1 112 Rajon Rondo 11.9 6 11.7 9500000
## 2 9 Russell Westbrook 23.5 7.8 10.4 16744218
## 3 23 John Wall 19.9 4.9 10.2 15851950
## 4 28 Chris Paul 19.5 4.2 10 21468695
## 5 148 Ricky Rubio 10.1 4.3 8.7 12700000
## 6 2 James Harden 29 6.1 7.5 15756438
## 7 79 Draymond Green 14 9.5 7.4 14260870
## 8 5 LeBron James 25.3 7.4 6.8 22970500
## 9 1 Stephen Curry 30.1 5.4 6.7 11370786
## 10 61 Tyreke Evans 15.2 5.2 6.6 10734586
## # ... with 91 more rows
assistdata <- assistdata %>%
mutate(astrank = nbastats2$Rk)
assistdata
## # A tibble: 101 x 7
## Rk Player Points TRB AST Salary astrank
## <dbl> <chr> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 112 Rajon Rondo 11.9 6 11.7 9500000 1
## 2 9 Russell Westbrook 23.5 7.8 10.4 16744218 2
## 3 23 John Wall 19.9 4.9 10.2 15851950 3
## 4 28 Chris Paul 19.5 4.2 10 21468695 4
## 5 148 Ricky Rubio 10.1 4.3 8.7 12700000 5
## 6 2 James Harden 29 6.1 7.5 15756438 7
## 7 79 Draymond Green 14 9.5 7.4 14260870 8
## 8 5 LeBron James 25.3 7.4 6.8 22970500 9
## 9 1 Stephen Curry 30.1 5.4 6.7 11370786 10
## 10 61 Tyreke Evans 15.2 5.2 6.6 10734586 11
## # ... with 91 more rows
plot(x=assistdata$Salary, y=assistdata$astrank, xlab="Yearly Salary", ylab="NBA Rank", main="NBA Salary v Assists", ylim = c(200,0))
abline(lm(assistdata$astrank ~ 1 + assistdata$Salary, data=assistdata), col="green")

plot(x=assistdata$Salary, y=assistdata$astrank, xlab="Salary", ylab="NBA Rank", main="NBA Salary v Stats", ylim = c(200,0), col="green")
abline(lm(assistdata$astrank ~ 1 + assistdata$Salary, data=assistdata), col="green")
par(new = TRUE)
plot(x=rebounddata$Salary, y=rebounddata$rebrank, xlab="Salary", ylab="", ylim = c(200,0), col="blue")
abline(lm(rebounddata$rebrank ~ 1 + rebounddata$Salary, data=rebounddata), col="blue")
par(new=TRUE)
plot(x=nbastats2$Salary, y=nbastats2$Rk, xlab="Salary", ylab="", ylim = c(200,0), col="red")
abline(lm(nbastats2$Rk ~ 1 + nbastats2$Salary, data=nbastats2), col="red")
legend(x=20000000, y=150, legend=c("Points", "Rebounds", "Assists"),
col=c("red", "blue", "green"), pch=c(8,16))
