install.packages("readr")
## Installing package into '/cloud/lib/x86_64-pc-linux-gnu-library/4.4'
## (as 'lib' is unspecified)
library(readr)
install.packages("dplyr")
## Installing package into '/cloud/lib/x86_64-pc-linux-gnu-library/4.4'
## (as 'lib' is unspecified)
library("dplyr")
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
install.packages("ggplot2")
## Installing package into '/cloud/lib/x86_64-pc-linux-gnu-library/4.4'
## (as 'lib' is unspecified)
library("ggplot2")

col_types <- c(
  "player" = "character",   
  "team" = "character",    
  "position" = "factor",    
  "height" = "integer",     
  "weight" = "integer",    
  "age" = "integer",        
  "experience" = "integer", 
  "college" = "character",  
  "salary" = "double",      
  "games" = "integer",    
  "minutes" = "integer", 
  "points" = "integer",     
  "points3" = "integer",   
  "points2" = "integer",    
  "points1" = "integer")

player_data2018 <-read.csv("nba2018-players.csv", colClasses = col_types)




summary(player_data2018)
##     player              team           position     height          weight     
##  Length:477         Length:477         C : 97   Min.   :69.00   Min.   :150.0  
##  Class :character   Class :character   PF: 98   1st Qu.:77.00   1st Qu.:200.0  
##  Mode  :character   Mode  :character   PG: 96   Median :79.00   Median :220.0  
##                                        SF: 84   Mean   :79.09   Mean   :219.9  
##                                        SG:102   3rd Qu.:82.00   3rd Qu.:240.0  
##                                                 Max.   :87.00   Max.   :290.0  
##       age          experience       college              salary        
##  Min.   :19.00   Min.   : 0.000   Length:477         Min.   :    5145  
##  1st Qu.:23.00   1st Qu.: 1.000   Class :character   1st Qu.: 1050961  
##  Median :26.00   Median : 4.000   Mode  :character   Median : 3000000  
##  Mean   :26.39   Mean   : 4.662                      Mean   : 5804697  
##  3rd Qu.:29.00   3rd Qu.: 7.000                      3rd Qu.: 8269663  
##  Max.   :40.00   Max.   :18.000                      Max.   :30963450  
##      games          minutes         points          points3     
##  Min.   : 1.00   Min.   :   1   Min.   :   0.0   Min.   :  0.0  
##  1st Qu.:25.00   1st Qu.: 381   1st Qu.: 124.0   1st Qu.:  2.0  
##  Median :60.00   Median :1123   Median : 403.0   Median : 26.0  
##  Mean   :50.71   Mean   :1164   Mean   : 510.3   Mean   : 46.4  
##  3rd Qu.:74.00   3rd Qu.:1843   3rd Qu.: 756.0   3rd Qu.: 73.0  
##  Max.   :82.00   Max.   :3048   Max.   :2558.0   Max.   :324.0  
##     points2         points1      
##  Min.   :  0.0   Min.   :  0.00  
##  1st Qu.: 30.0   1st Qu.: 15.00  
##  Median :100.0   Median : 50.00  
##  Mean   :142.3   Mean   : 86.49  
##  3rd Qu.:208.0   3rd Qu.:116.00  
##  Max.   :730.0   Max.   :746.00
class(player_data2018)
## [1] "data.frame"
team_salaries <- summarise(
  group_by(player_data2018, team),
  total_salary = sum(salary) / 1e6,
  mean_salary = mean(salary) / 1e6,
  median_salary = median(salary) / 1e6
)

 
team_salaries <- arrange(team_salaries, desc(total_salary))


print(team_salaries)
## # A tibble: 30 × 4
##    team  total_salary mean_salary median_salary
##    <chr>        <dbl>       <dbl>         <dbl>
##  1 CLE           127.        7.07          2.03
##  2 LAC           115.        7.65          3.5 
##  3 MEM           109.        6.81          3.12
##  4 TOR           108.        7.23          5.3 
##  5 SAS           105.        6.59          2.22
##  6 MIL           105.        5.51          2.57
##  7 ORL           104.        5.78          4.13
##  8 DET           103.        6.87          4.62
##  9 POR           102.        7.88          6.67
## 10 WAS           101.        6.72          3.73
## # ℹ 20 more rows
as.data.frame(team_salaries)
##    team total_salary mean_salary median_salary
## 1   CLE    127.25458    7.069699      2.025829
## 2   LAC    114.77662    7.651775      3.500000
## 3   MEM    108.94584    6.809115      3.115470
## 4   TOR    108.45847    7.230565      5.300000
## 5   SAS    105.39553    6.587221      2.224829
## 6   MIL    104.64657    5.507714      2.568600
## 7   ORL    104.11034    5.783908      4.130580
## 8   DET    103.07449    6.871632      4.625000
## 9   POR    102.48876    7.883751      6.666667
## 10  WAS    100.78591    6.719061      3.730653
## 11  GSW    100.24256    6.265160      1.551659
## 12  NYK     97.10692    6.473794      2.898000
## 13  NOP     94.03548    5.877217      2.989125
## 14  ATL     93.40559    5.494447      2.500000
## 15  DAL     92.82830    5.157128      0.945166
## 16  IND     92.62084    5.788802      4.000000
## 17  CHI     92.50189    5.781368      2.102340
## 18  BOS     91.91509    6.127673      4.743000
## 19  CHO     88.50477    5.531548      4.024157
## 20  SAC     88.27720    5.517325      4.604441
## 21  HOU     87.39233    6.242309      2.309280
## 22  OKC     86.98136    5.798758      3.140517
## 23  LAL     85.12544    6.080389      5.307240
## 24  UTA     80.32319    5.354880      2.433334
## 25  DEN     79.02822    4.648719      3.241800
## 26  BRK     76.21567    4.011351      1.914544
## 27  PHO     73.28258    4.310740      2.223600
## 28  MIA     72.94438    5.210313      3.449000
## 29  MIN     59.87827    4.277020      3.650000
## 30  PHI     56.29336    3.311374      1.514160
mean_salary <- mean(team_salaries$total_salary)

ggplot(team_salaries, aes(x = reorder(team, total_salary), y = total_salary)) +
  geom_bar(stat = "identity", fill = "grey") +   
  coord_flip() +  
  geom_hline(yintercept = mean_salary, color = "red", size = .5) +  # Draw 
  labs(
    title = "Total Salary by Team",
    x = "Team",
    y = "Total Salary (in Millions)"
  ) 
## Warning: Using `size` aesthetic for lines was deprecated in ggplot2 3.4.0.
## ℹ Please use `linewidth` instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.