This is analysis on NBA Data from 1978 to 2016, where different variables and statistics are recorded over time:
setwd('C:/Users/hlo/Desktop/data_set')
basketball_data = read.csv('NBA Season Data.csv')
#we can see the different variables here#
colnames(basketball_data)
## [1] "Year" "Tm" "Player"
## [4] "Age" "G" "MP"
## [7] "PER" "TS." "X3PAr"
## [10] "FTr" "ORB." "DRB."
## [13] "TRB." "AST." "STL."
## [16] "BLK." "TOV." "USG."
## [19] "." "OWS" "DWS"
## [22] "WS" "WS.48" "..1"
## [25] "OBPM" "DBPM" "BPM"
## [28] "VORP" "OWS.48" "DWS.48"
## [31] "Shot." "Player.ID" "Team.Mar"
## [34] "Team.MP" "Team.Gm" "Year.3PAr"
## [37] "Team.TS." "Tm.USG" "Tm.TS.W.O.Plyr"
## [40] "Reb.Vers" "Defense" "Val.Shot"
## [43] "Offense" "MPG...Int" "RAW.SPM"
## [46] "X.Min" "RAW.Contrib" "Tm.Sum"
## [49] "Tm.Adj" "BPM.1" "StdErr"
## [52] "Contrib" "VORP.1" "Reb.Vers.1"
## [55] "Val.Shot.1" "Offense.1" "Defense.1"
## [58] "MPG" "Raw.OBPM" "Contrib.1"
## [61] "Tm.Ortg" "Tm.Sum.1" "Tm.Adj.1"
## [64] "Tm.DRtg" "OBPM.1" "OStdErr"
## [67] "Ocontrib" "OVORP" "DBPM.1"
## [70] "DStdErr" "Dcontrib" "DVORP"
## [73] "Sum.SPM" "X.Min.1" "MPG.1"
## [76] "ReMPG" "BPM.2" "Contrib.2"
## [79] "VORP.2" "VORP.Gm" "O.BPM"
## [82] "Ocontrib.1" "OVORP.1" "OVORP.Gm"
## [85] "D.BPM" "Dcontrib.1" "DVORP.1"
## [88] "DVORP.Gm" "Production" "Prod.Gm"
## [91] "Exp.BPM" "TrueTalentBPM" "Exp.Min"
## [94] "TrueTalentVORP" "TrueSalary" "TrueTimeVORP"
## [97] "Adjusted.Production" "WORP" "O.WORP"
## [100] "D.WORP" "Adjusted.WORP" "Estimated.Position"
## [103] "BBRef.Pos" "Age.on.Feb.1" "Yrs.Experience"
## [106] "Height" "Weight" "Rounded.Position"
## [109] "Rounded.Age"
#summary for Age variable#
summary(basketball_data$Age)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 0.00 23.00 26.00 26.09 29.00 44.00
#cleaning up the true salary variable #
basketball_data$TrueSalary<- gsub("\\$|\\,","",basketball_data$TrueSalary)
basketball_data$TrueSalary<- as.numeric(basketball_data$TrueSalary)
str(basketball_data$TrueSalary)
## num [1:17729] 8800000 14800000 14700000 20600000 18500000 11900000 19700000 18500000 17000000 10700000 ...
attach(basketball_data)
library(plotly)
## Warning: package 'plotly' was built under R version 3.3.3
## Loading required package: ggplot2
## Warning: package 'ggplot2' was built under R version 3.3.2
##
## Attaching package: 'plotly'
## The following object is masked from 'package:ggplot2':
##
## last_plot
## The following object is masked from 'package:stats':
##
## filter
## The following object is masked from 'package:graphics':
##
## layout
# this is a density plot of the True Salary in plotly#
# its pretty cool#
p<-ggplot(basketball_data, aes(x=TrueSalary)) +stat_density(geom="line")
p<-ggplotly(p)
## Warning: We recommend that you use the dev version of ggplot2 with `ggplotly()`
## Install it with: `devtools::install_github('hadley/ggplot2')`
## Warning: Removed 5439 rows containing non-finite values (stat_density).
p
You can also embed plots, for example:
## ORB. DRB. TRB. AST. STL. BLK.
## 1 8.0 25.8 17.0 6.8 0.9 1.1
## 2 3.6 6.4 5.0 22.2 2.4 0.2
## 3 10.2 22.7 16.4 6.5 1.3 2.4
## 4 8.0 25.6 17.0 15.7 1.4 1.1
## 5 7.8 24.7 16.4 13.4 1.4 2.0
## 6 7.9 16.0 11.8 9.9 1.7 0.6
## Warning: package 'corrplot' was built under R version 3.3.2
## ORB. DRB. TRB. AST. STL. BLK.
## ORB. 1.00000000 0.5044349 0.8187350 -0.3636877 -0.07104839 0.3354940
## DRB. 0.50443487 1.0000000 0.9038240 -0.2916031 -0.10230481 0.4255017
## TRB. 0.81873500 0.9038240 1.0000000 -0.3666336 -0.10254574 0.4395971
## AST. -0.36368766 -0.2916031 -0.3666336 1.0000000 0.32763746 -0.2823699
## STL. -0.07104839 -0.1023048 -0.1025457 0.3276375 1.00000000 -0.1123384
## BLK. 0.33549398 0.4255017 0.4395971 -0.2823699 -0.11233838 1.0000000
## Warning: package 'qtlcharts' was built under R version 3.3.3
## Set screen size to height=700 x width=1000