install.packages("readr")
## Installing package into '/cloud/lib/x86_64-pc-linux-gnu-library/4.4'
## (as 'lib' is unspecified)
library(readr)
install.packages("dplyr")
## Installing package into '/cloud/lib/x86_64-pc-linux-gnu-library/4.4'
## (as 'lib' is unspecified)
library("dplyr")
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
install.packages("ggplot2")
## Installing package into '/cloud/lib/x86_64-pc-linux-gnu-library/4.4'
## (as 'lib' is unspecified)
library("ggplot2")
col_types <- c(
"player" = "character",
"team" = "character",
"position" = "factor",
"height" = "integer",
"weight" = "integer",
"age" = "integer",
"experience" = "integer",
"college" = "character",
"salary" = "double",
"games" = "integer",
"minutes" = "integer",
"points" = "integer",
"points3" = "integer",
"points2" = "integer",
"points1" = "integer")
player_data2018 <-read.csv("nba2018-players.csv", colClasses = col_types)
summary(player_data2018)
## player team position height weight
## Length:477 Length:477 C : 97 Min. :69.00 Min. :150.0
## Class :character Class :character PF: 98 1st Qu.:77.00 1st Qu.:200.0
## Mode :character Mode :character PG: 96 Median :79.00 Median :220.0
## SF: 84 Mean :79.09 Mean :219.9
## SG:102 3rd Qu.:82.00 3rd Qu.:240.0
## Max. :87.00 Max. :290.0
## age experience college salary
## Min. :19.00 Min. : 0.000 Length:477 Min. : 5145
## 1st Qu.:23.00 1st Qu.: 1.000 Class :character 1st Qu.: 1050961
## Median :26.00 Median : 4.000 Mode :character Median : 3000000
## Mean :26.39 Mean : 4.662 Mean : 5804697
## 3rd Qu.:29.00 3rd Qu.: 7.000 3rd Qu.: 8269663
## Max. :40.00 Max. :18.000 Max. :30963450
## games minutes points points3
## Min. : 1.00 Min. : 1 Min. : 0.0 Min. : 0.0
## 1st Qu.:25.00 1st Qu.: 381 1st Qu.: 124.0 1st Qu.: 2.0
## Median :60.00 Median :1123 Median : 403.0 Median : 26.0
## Mean :50.71 Mean :1164 Mean : 510.3 Mean : 46.4
## 3rd Qu.:74.00 3rd Qu.:1843 3rd Qu.: 756.0 3rd Qu.: 73.0
## Max. :82.00 Max. :3048 Max. :2558.0 Max. :324.0
## points2 points1
## Min. : 0.0 Min. : 0.00
## 1st Qu.: 30.0 1st Qu.: 15.00
## Median :100.0 Median : 50.00
## Mean :142.3 Mean : 86.49
## 3rd Qu.:208.0 3rd Qu.:116.00
## Max. :730.0 Max. :746.00
class(player_data2018)
## [1] "data.frame"
team_salaries <- summarise(
group_by(player_data2018, team),
total_salary = sum(salary) / 1e6,
mean_salary = mean(salary) / 1e6,
median_salary = median(salary) / 1e6
)
team_salaries <- arrange(team_salaries, desc(total_salary))
print(team_salaries)
## # A tibble: 30 × 4
## team total_salary mean_salary median_salary
## <chr> <dbl> <dbl> <dbl>
## 1 CLE 127. 7.07 2.03
## 2 LAC 115. 7.65 3.5
## 3 MEM 109. 6.81 3.12
## 4 TOR 108. 7.23 5.3
## 5 SAS 105. 6.59 2.22
## 6 MIL 105. 5.51 2.57
## 7 ORL 104. 5.78 4.13
## 8 DET 103. 6.87 4.62
## 9 POR 102. 7.88 6.67
## 10 WAS 101. 6.72 3.73
## # ℹ 20 more rows
as.data.frame(team_salaries)
## team total_salary mean_salary median_salary
## 1 CLE 127.25458 7.069699 2.025829
## 2 LAC 114.77662 7.651775 3.500000
## 3 MEM 108.94584 6.809115 3.115470
## 4 TOR 108.45847 7.230565 5.300000
## 5 SAS 105.39553 6.587221 2.224829
## 6 MIL 104.64657 5.507714 2.568600
## 7 ORL 104.11034 5.783908 4.130580
## 8 DET 103.07449 6.871632 4.625000
## 9 POR 102.48876 7.883751 6.666667
## 10 WAS 100.78591 6.719061 3.730653
## 11 GSW 100.24256 6.265160 1.551659
## 12 NYK 97.10692 6.473794 2.898000
## 13 NOP 94.03548 5.877217 2.989125
## 14 ATL 93.40559 5.494447 2.500000
## 15 DAL 92.82830 5.157128 0.945166
## 16 IND 92.62084 5.788802 4.000000
## 17 CHI 92.50189 5.781368 2.102340
## 18 BOS 91.91509 6.127673 4.743000
## 19 CHO 88.50477 5.531548 4.024157
## 20 SAC 88.27720 5.517325 4.604441
## 21 HOU 87.39233 6.242309 2.309280
## 22 OKC 86.98136 5.798758 3.140517
## 23 LAL 85.12544 6.080389 5.307240
## 24 UTA 80.32319 5.354880 2.433334
## 25 DEN 79.02822 4.648719 3.241800
## 26 BRK 76.21567 4.011351 1.914544
## 27 PHO 73.28258 4.310740 2.223600
## 28 MIA 72.94438 5.210313 3.449000
## 29 MIN 59.87827 4.277020 3.650000
## 30 PHI 56.29336 3.311374 1.514160
mean_salary <- mean(team_salaries$total_salary)
ggplot(team_salaries, aes(x = reorder(team, total_salary), y = total_salary)) +
geom_bar(stat = "identity", fill = "grey") +
coord_flip() +
geom_hline(yintercept = mean_salary, color = "red", size = .5) + # Draw
labs(
title = "Total Salary by Team",
x = "Team",
y = "Total Salary (in Millions)"
)
## Warning: Using `size` aesthetic for lines was deprecated in ggplot2 3.4.0.
## ℹ Please use `linewidth` instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
