library(tidyr)
library(dplyr)
## Warning: package 'dplyr' was built under R version 3.4.2
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(psych)
library(tidyverse)
## -- Attaching packages ----------------------------------------------------------------------------------------------------------- tidyverse 1.2.0 --
## v ggplot2 2.2.1 v purrr 0.2.3
## v tibble 1.3.4 v stringr 1.2.0
## v readr 1.1.1 v forcats 0.2.0
## Warning: package 'ggplot2' was built under R version 3.4.2
## Warning: package 'forcats' was built under R version 3.4.2
## -- Conflicts -------------------------------------------------------------------------------------------------------------- tidyverse_conflicts() --
## x ggplot2::%+%() masks psych::%+%()
## x ggplot2::alpha() masks psych::alpha()
## x dplyr::filter() masks stats::filter()
## x dplyr::lag() masks stats::lag()
nba_players <- read.csv("https://raw.githubusercontent.com/mikegankhuyag/606-HW/master/NBA%20Draft%20Picks%201994-2014.csv")
nba.players <- data.frame(nba_players)
nba.players$Yrs <-as.character(nba.players$Yrs)
nba.players$Yrs <- as.numeric(nba.players$Yrs)
## Warning: NAs introduced by coercion
Experience_grade <- data.frame(
Years = 0:23,
Experience_g = c("Unsuccessful","Unsuccessful","Little Experience","Little Experience", "Some Experience", "Some Experience", "Experienced", "Experienced", "Very Experienced","Very Experienced","Legend","Legend","Legend","Legend","Legend","Legend","Legend","Legend","Legend","Legend","Legend","Legend","Legend","Legend"))
nba.players$Yrs[is.na(nba.players$Yrs)] <- 0
nba.players$Experience <- Experience_grade$Experience_g[match(nba.players$Yrs,Experience_grade$Years)]
head(nba.players$Experience)
## [1] Some Experience Little Experience Little Experience Some Experience
## [5] Little Experience Some Experience
## 6 Levels: Experienced Legend Little Experience ... Very Experienced
nba.players$VORP <- as.character(nba.players$VORP)
nba.players$VORP <- as.numeric(nba.players$VORP)
## Warning: NAs introduced by coercion
nba.players$VORP[is.na(nba.players$VORP)] <- 0
You should phrase your research question in a way that matches up with the scope of inference your dataset allows for.
In the NBA, can draft location predict success? Success will be valued by VORP(Value Over Replacement Player) What are the odds of a second round player, having a successful career in the NBA?
What are the cases, and how many are there?
The cases are NBA players drafted into the league from 1994-2014. there are are total of 1223 cases.
Describe the method of data collection. Data was collected from https://www.basketball-reference.com/draft/ and the list shows each player’s total career statistics.
What type of study is this (observational/experiment)?
This is an observational study.
If you collected the data, state self-collected. If not, provide a citation/link.
All data came from https://www.basketball-reference.com/draft/
What is the response variable, and what type is it (numerical/categorical)?
How successful the player’s careers are. The variables are categorical and is listed below. 1.Legend - 10 or more years 2.Very Experienced - 8-9 Years 3.Experienced - 6-7 Years 4.Some Experience - 4-5 Years 5.Little Experience - 2-3 Years 6-Unsuccessfull - 1 or Less
What is the explanatory variable, and what type is it (numerical/categorival)?
The explanatory variable is number of years in the league and it is numerical.
Provide summary statistics relevant to your research question. For example, if you’re comparing means across groups provide means, SDs, sample sizes of each group. This step requires the use of R, hence a code chunk is provided below. Insert more code chunks as needed.
describe(nba.players$Yrs)
## vars n mean sd median trimmed mad min max range skew kurtosis
## X1 1 1236 5.58 4.76 4 5.11 4.45 0 21 21 0.72 -0.35
## se
## X1 0.14
describe(nba.players$VORP)
## vars n mean sd median trimmed mad min max range skew kurtosis
## X1 1 1236 3.95 10.79 0 1.55 1.19 -8.3 116.6 124.9 4.24 25.18
## se
## X1 0.31
table(nba.players$Experience, useNA = 'ifany')
##
## Experienced Legend Little Experience Some Experience
## 130 291 226 178
## Unsuccessful Very Experienced
## 299 112
describeBy(nba.players$Yrs,
group = nba.players$Experience, mat=TRUE)
## item group1 vars n mean sd median trimmed
## X11 1 Experienced 1 130 6.4230769 0.4959586 6 6.4038462
## X12 2 Legend 1 291 12.6082474 2.4978199 12 12.2489270
## X13 3 Little Experience 1 226 2.4247788 0.4954066 2 2.4065934
## X14 4 Some Experience 1 178 4.3707865 0.4843779 4 4.3402778
## X15 5 Unsuccessful 1 299 0.3879599 0.4881023 0 0.3609959
## X16 6 Very Experienced 1 112 8.4642857 0.5009643 8 8.4555556
## mad min max range skew kurtosis se
## X11 0.0000 6 7 1 0.3078134 -1.9198419 0.04349845
## X12 2.9652 10 21 11 1.0244646 0.4998887 0.14642479
## X13 0.0000 2 3 1 0.3023310 -1.9170197 0.03295396
## X14 0.0000 4 5 1 0.5305261 -1.7281563 0.03630566
## X15 0.0000 0 1 1 0.4575493 -1.7966239 0.02822768
## X16 0.0000 8 9 1 0.1413091 -1.9976293 0.04733668
describeBy(nba.players$VORP,
group = nba.players$Experience, mat=TRUE)
## item group1 vars n mean sd median
## X11 1 Experienced 1 130 2.3538462 5.4015380 0.65
## X12 2 Legend 1 291 13.9927835 17.5389198 9.20
## X13 3 Little Experience 1 226 -0.3867257 0.8843835 -0.30
## X14 4 Some Experience 1 178 0.1494382 2.4126373 -0.40
## X15 5 Unsuccessful 1 299 -0.0638796 0.1620951 0.00
## X16 6 Very Experienced 1 112 5.2303571 8.5144587 2.50
## trimmed mad min max range skew kurtosis se
## X11 1.37788462 3.18759 -4.1 25.4 29.5 1.843642 3.494965 0.473746224
## X12 10.98283262 10.97124 -7.1 116.6 123.7 2.215044 6.765234 1.028149685
## X13 -0.40329670 0.29652 -2.9 8.6 11.5 4.729200 47.258255 0.058828313
## X14 -0.19027778 1.03782 -4.1 12.6 16.7 2.215084 7.409229 0.180834813
## X15 -0.02655602 0.00000 -1.1 0.2 1.3 -3.843541 18.278836 0.009374203
## X16 3.93666667 5.63388 -8.3 42.3 50.6 1.916635 5.015057 0.804540725
ggplot(nba.players, aes(x=nba.players$Yrs)) + geom_histogram()
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
ggplot(nba.players, aes(x=nba.players$VORP)) + geom_histogram()
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.