library(tidyr)
library(dplyr)
## Warning: package 'dplyr' was built under R version 3.4.2
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library(psych)
library(tidyverse)
## -- Attaching packages ----------------------------------------------------------------------------------------------------------- tidyverse 1.2.0 --
## v ggplot2 2.2.1     v purrr   0.2.3
## v tibble  1.3.4     v stringr 1.2.0
## v readr   1.1.1     v forcats 0.2.0
## Warning: package 'ggplot2' was built under R version 3.4.2
## Warning: package 'forcats' was built under R version 3.4.2
## -- Conflicts -------------------------------------------------------------------------------------------------------------- tidyverse_conflicts() --
## x ggplot2::%+%()   masks psych::%+%()
## x ggplot2::alpha() masks psych::alpha()
## x dplyr::filter()  masks stats::filter()
## x dplyr::lag()     masks stats::lag()

Data Preparation

nba_players <- read.csv("https://raw.githubusercontent.com/mikegankhuyag/606-HW/master/NBA%20Draft%20Picks%201994-2014.csv")

nba.players <- data.frame(nba_players)
nba.players$Yrs <-as.character(nba.players$Yrs)
nba.players$Yrs <- as.numeric(nba.players$Yrs)
## Warning: NAs introduced by coercion
Experience_grade <- data.frame(
  Years = 0:23,
  Experience_g = c("Unsuccessful","Unsuccessful","Little Experience","Little Experience", "Some Experience", "Some Experience", "Experienced", "Experienced", "Very Experienced","Very Experienced","Legend","Legend","Legend","Legend","Legend","Legend","Legend","Legend","Legend","Legend","Legend","Legend","Legend","Legend"))
nba.players$Yrs[is.na(nba.players$Yrs)] <- 0
nba.players$Experience <- Experience_grade$Experience_g[match(nba.players$Yrs,Experience_grade$Years)]
head(nba.players$Experience)
## [1] Some Experience   Little Experience Little Experience Some Experience  
## [5] Little Experience Some Experience  
## 6 Levels: Experienced Legend Little Experience ... Very Experienced
nba.players$VORP <- as.character(nba.players$VORP)
nba.players$VORP <- as.numeric(nba.players$VORP)
## Warning: NAs introduced by coercion
nba.players$VORP[is.na(nba.players$VORP)] <- 0

Research question

You should phrase your research question in a way that matches up with the scope of inference your dataset allows for.

In the NBA, can draft location predict success? Success will be valued by VORP(Value Over Replacement Player) What are the odds of a second round player, having a successful career in the NBA?

Cases

What are the cases, and how many are there?

The cases are NBA players drafted into the league from 1994-2014. there are are total of 1223 cases.

Data collection

Describe the method of data collection. Data was collected from https://www.basketball-reference.com/draft/ and the list shows each player’s total career statistics.

Type of study

What type of study is this (observational/experiment)?

This is an observational study.

Data Source

If you collected the data, state self-collected. If not, provide a citation/link.

All data came from https://www.basketball-reference.com/draft/

Response

What is the response variable, and what type is it (numerical/categorical)?

How successful the player’s careers are. The variables are categorical and is listed below. 1.Legend - 10 or more years 2.Very Experienced - 8-9 Years 3.Experienced - 6-7 Years 4.Some Experience - 4-5 Years 5.Little Experience - 2-3 Years 6-Unsuccessfull - 1 or Less

Explanatory

What is the explanatory variable, and what type is it (numerical/categorival)?

The explanatory variable is number of years in the league and it is numerical.

Relevant summary statistics

Provide summary statistics relevant to your research question. For example, if you’re comparing means across groups provide means, SDs, sample sizes of each group. This step requires the use of R, hence a code chunk is provided below. Insert more code chunks as needed.

describe(nba.players$Yrs)
##    vars    n mean   sd median trimmed  mad min max range skew kurtosis
## X1    1 1236 5.58 4.76      4    5.11 4.45   0  21    21 0.72    -0.35
##      se
## X1 0.14
describe(nba.players$VORP)
##    vars    n mean    sd median trimmed  mad  min   max range skew kurtosis
## X1    1 1236 3.95 10.79      0    1.55 1.19 -8.3 116.6 124.9 4.24    25.18
##      se
## X1 0.31
table(nba.players$Experience, useNA = 'ifany')
## 
##       Experienced            Legend Little Experience   Some Experience 
##               130               291               226               178 
##      Unsuccessful  Very Experienced 
##               299               112
describeBy(nba.players$Yrs, 
           group = nba.players$Experience, mat=TRUE)
##     item            group1 vars   n       mean        sd median    trimmed
## X11    1       Experienced    1 130  6.4230769 0.4959586      6  6.4038462
## X12    2            Legend    1 291 12.6082474 2.4978199     12 12.2489270
## X13    3 Little Experience    1 226  2.4247788 0.4954066      2  2.4065934
## X14    4   Some Experience    1 178  4.3707865 0.4843779      4  4.3402778
## X15    5      Unsuccessful    1 299  0.3879599 0.4881023      0  0.3609959
## X16    6  Very Experienced    1 112  8.4642857 0.5009643      8  8.4555556
##        mad min max range      skew   kurtosis         se
## X11 0.0000   6   7     1 0.3078134 -1.9198419 0.04349845
## X12 2.9652  10  21    11 1.0244646  0.4998887 0.14642479
## X13 0.0000   2   3     1 0.3023310 -1.9170197 0.03295396
## X14 0.0000   4   5     1 0.5305261 -1.7281563 0.03630566
## X15 0.0000   0   1     1 0.4575493 -1.7966239 0.02822768
## X16 0.0000   8   9     1 0.1413091 -1.9976293 0.04733668
describeBy(nba.players$VORP, 
           group = nba.players$Experience, mat=TRUE)
##     item            group1 vars   n       mean         sd median
## X11    1       Experienced    1 130  2.3538462  5.4015380   0.65
## X12    2            Legend    1 291 13.9927835 17.5389198   9.20
## X13    3 Little Experience    1 226 -0.3867257  0.8843835  -0.30
## X14    4   Some Experience    1 178  0.1494382  2.4126373  -0.40
## X15    5      Unsuccessful    1 299 -0.0638796  0.1620951   0.00
## X16    6  Very Experienced    1 112  5.2303571  8.5144587   2.50
##         trimmed      mad  min   max range      skew  kurtosis          se
## X11  1.37788462  3.18759 -4.1  25.4  29.5  1.843642  3.494965 0.473746224
## X12 10.98283262 10.97124 -7.1 116.6 123.7  2.215044  6.765234 1.028149685
## X13 -0.40329670  0.29652 -2.9   8.6  11.5  4.729200 47.258255 0.058828313
## X14 -0.19027778  1.03782 -4.1  12.6  16.7  2.215084  7.409229 0.180834813
## X15 -0.02655602  0.00000 -1.1   0.2   1.3 -3.843541 18.278836 0.009374203
## X16  3.93666667  5.63388 -8.3  42.3  50.6  1.916635  5.015057 0.804540725
ggplot(nba.players, aes(x=nba.players$Yrs)) + geom_histogram()
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

ggplot(nba.players, aes(x=nba.players$VORP)) + geom_histogram()
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.