NBA DATASET

This is analysis on NBA Data from 1978 to 2016, where different variables and statistics are recorded over time:

setwd('C:/Users/hlo/Desktop/data_set')
basketball_data = read.csv('NBA Season Data.csv')

#we can see the different variables here#
colnames(basketball_data)
##   [1] "Year"                "Tm"                  "Player"             
##   [4] "Age"                 "G"                   "MP"                 
##   [7] "PER"                 "TS."                 "X3PAr"              
##  [10] "FTr"                 "ORB."                "DRB."               
##  [13] "TRB."                "AST."                "STL."               
##  [16] "BLK."                "TOV."                "USG."               
##  [19] "."                   "OWS"                 "DWS"                
##  [22] "WS"                  "WS.48"               "..1"                
##  [25] "OBPM"                "DBPM"                "BPM"                
##  [28] "VORP"                "OWS.48"              "DWS.48"             
##  [31] "Shot."               "Player.ID"           "Team.Mar"           
##  [34] "Team.MP"             "Team.Gm"             "Year.3PAr"          
##  [37] "Team.TS."            "Tm.USG"              "Tm.TS.W.O.Plyr"     
##  [40] "Reb.Vers"            "Defense"             "Val.Shot"           
##  [43] "Offense"             "MPG...Int"           "RAW.SPM"            
##  [46] "X.Min"               "RAW.Contrib"         "Tm.Sum"             
##  [49] "Tm.Adj"              "BPM.1"               "StdErr"             
##  [52] "Contrib"             "VORP.1"              "Reb.Vers.1"         
##  [55] "Val.Shot.1"          "Offense.1"           "Defense.1"          
##  [58] "MPG"                 "Raw.OBPM"            "Contrib.1"          
##  [61] "Tm.Ortg"             "Tm.Sum.1"            "Tm.Adj.1"           
##  [64] "Tm.DRtg"             "OBPM.1"              "OStdErr"            
##  [67] "Ocontrib"            "OVORP"               "DBPM.1"             
##  [70] "DStdErr"             "Dcontrib"            "DVORP"              
##  [73] "Sum.SPM"             "X.Min.1"             "MPG.1"              
##  [76] "ReMPG"               "BPM.2"               "Contrib.2"          
##  [79] "VORP.2"              "VORP.Gm"             "O.BPM"              
##  [82] "Ocontrib.1"          "OVORP.1"             "OVORP.Gm"           
##  [85] "D.BPM"               "Dcontrib.1"          "DVORP.1"            
##  [88] "DVORP.Gm"            "Production"          "Prod.Gm"            
##  [91] "Exp.BPM"             "TrueTalentBPM"       "Exp.Min"            
##  [94] "TrueTalentVORP"      "TrueSalary"          "TrueTimeVORP"       
##  [97] "Adjusted.Production" "WORP"                "O.WORP"             
## [100] "D.WORP"              "Adjusted.WORP"       "Estimated.Position" 
## [103] "BBRef.Pos"           "Age.on.Feb.1"        "Yrs.Experience"     
## [106] "Height"              "Weight"              "Rounded.Position"   
## [109] "Rounded.Age"
#summary for Age variable# 
summary(basketball_data$Age)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##    0.00   23.00   26.00   26.09   29.00   44.00
#cleaning up the true salary variable #
basketball_data$TrueSalary<- gsub("\\$|\\,","",basketball_data$TrueSalary)
basketball_data$TrueSalary<- as.numeric(basketball_data$TrueSalary)
str(basketball_data$TrueSalary)
##  num [1:17729] 8800000 14800000 14700000 20600000 18500000 11900000 19700000 18500000 17000000 10700000 ...
attach(basketball_data)
library(plotly)
## Warning: package 'plotly' was built under R version 3.3.3
## Loading required package: ggplot2
## Warning: package 'ggplot2' was built under R version 3.3.2
## 
## Attaching package: 'plotly'
## The following object is masked from 'package:ggplot2':
## 
##     last_plot
## The following object is masked from 'package:stats':
## 
##     filter
## The following object is masked from 'package:graphics':
## 
##     layout
# this is a density plot of the True Salary in plotly# 
# its pretty cool#
p<-ggplot(basketball_data, aes(x=TrueSalary)) +stat_density(geom="line")
p<-ggplotly(p)
## Warning: We recommend that you use the dev version of ggplot2 with `ggplotly()`
## Install it with: `devtools::install_github('hadley/ggplot2')`
## Warning: Removed 5439 rows containing non-finite values (stat_density).
p

Correlation Plots

You can also embed plots, for example:

##   ORB. DRB. TRB. AST. STL. BLK.
## 1  8.0 25.8 17.0  6.8  0.9  1.1
## 2  3.6  6.4  5.0 22.2  2.4  0.2
## 3 10.2 22.7 16.4  6.5  1.3  2.4
## 4  8.0 25.6 17.0 15.7  1.4  1.1
## 5  7.8 24.7 16.4 13.4  1.4  2.0
## 6  7.9 16.0 11.8  9.9  1.7  0.6
## Warning: package 'corrplot' was built under R version 3.3.2
##             ORB.       DRB.       TRB.       AST.        STL.       BLK.
## ORB.  1.00000000  0.5044349  0.8187350 -0.3636877 -0.07104839  0.3354940
## DRB.  0.50443487  1.0000000  0.9038240 -0.2916031 -0.10230481  0.4255017
## TRB.  0.81873500  0.9038240  1.0000000 -0.3666336 -0.10254574  0.4395971
## AST. -0.36368766 -0.2916031 -0.3666336  1.0000000  0.32763746 -0.2823699
## STL. -0.07104839 -0.1023048 -0.1025457  0.3276375  1.00000000 -0.1123384
## BLK.  0.33549398  0.4255017  0.4395971 -0.2823699 -0.11233838  1.0000000

## Warning: package 'qtlcharts' was built under R version 3.3.3
## Set screen size to height=700 x width=1000