library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(tidyr)
library(ggplot2)
library(ggplot2)
library(qtl)
Your team should not draft a Running Back in First Round.
In the NFL, passing is king. Rushing barely matters. Todays NFL is a pass first league, according to PFF, Rush Efficiency explains only 4.4% of the variance in winfs while Pass efficiency explains 62% of the variance in NFL, so Why do teams still draft Running Backs in the first round.
Then their is the part about their expensive rookie contracts. Running Back are the 4th most expensive position behind, QR, WR and Edge Rushers so it doesnt make sense to draft one in the first round.
Our Data is running back combine and NFL statistics data from 2015-2020. I got the data from NFL PFF (Pro Football Focus).
df=read.csv("https://github.com/mianshariq/SPS/raw/dab24b98c3c1d48b96ea619c01caacfefa916386/Data%20606/Projects/NFL%20Data.csv")
df1=read.csv("https://github.com/mianshariq/SPS/raw/4fe676d9723fea08abb22f2021d644177dc16698/Data%20606/Projects/NFL%20Data%20Min.csv")
Dependent Variable: Average Yards Per Game
Independent Variables: Combine Metrics, Draft Order
Exploratory Data Analysis: Descriptive Summary
glimpse(df)
## Rows: 277
## Columns: 21
## $ Year <int> 2020, 2020, 2020, 2020, 2020, 2020, 2020, 2020, 2020~
## $ Name <chr> "Salvon Ahmed", "Cam Akers", "Darius Anderson", "Lev~
## $ College <chr> "Washington", "Florida State", "Texas Christian", "W~
## $ POS <chr> "RB", "RB", "RB", "RB", "RB", "RB", "RB", "RB", "RB"~
## $ Height_in <dbl> 70.88, 70.38, 70.50, 68.88, 68.88, 70.63, 67.88, 70.~
## $ Weight_lbs <int> 197, 217, 208, 192, 207, 204, 188, 217, 247, 209, 21~
## $ Hand_Size_in <dbl> 8.75, 9.00, 9.63, 8.75, 8.63, 9.75, 9.25, 9.25, 9.63~
## $ Arm_Length_in <dbl> 29.25, 30.63, 30.50, 30.25, 31.25, 30.88, 30.00, 30.~
## $ X40_Yard_sec <dbl> 4.62, 4.47, 4.61, 4.50, 4.57, NA, 4.42, 4.58, 4.53, ~
## $ Bench_Press <int> NA, 20, 19, 16, 12, 13, 20, NA, 23, 23, NA, 15, 20, ~
## $ Vert_Leap_In <dbl> 34.5, 35.5, 36.0, 39.5, 39.0, NA, 37.5, 33.5, 41.0, ~
## $ Broad_Jump_in <int> 120, 122, 128, 125, 122, NA, 120, 119, 131, NA, 127,~
## $ Shuttle_Shuttle <dbl> NA, 4.42, 4.19, NA, 4.25, NA, NA, 4.32, NA, NA, NA, ~
## $ X3Cone <dbl> NA, NA, NA, NA, 6.97, NA, NA, 7.18, 7.19, NA, NA, NA~
## $ Team <chr> "#N/A", "Rams", "#N/A", "#N/A", "Cardinals", "#N/A",~
## $ Round <chr> "#N/A", "2", "#N/A", "#N/A", "7", "#N/A", "7", "4", ~
## $ Draft_Order <chr> "40", "52", "#N/A", "#N/A", "222", "#N/A", "245", "1~
## $ RushYard_Per_Game <chr> "53.17", "48.08", "#N/A", "2.2", "#N/A", "#N/A", "0"~
## $ GP <chr> "6", "13", "#N/A", "5", "#N/A", "#N/A", "4", "12", "~
## $ Total_Yrd_pg <chr> "63.33", "57.54", "#N/A", "3.2", "#N/A", "#N/A", "0"~
## $ TD <chr> "3", "2", "#N/A", "0", "#N/A", "#N/A", "0", "2", "2"~
summary(df)
## Year Name College POS
## Min. :2015 Length:277 Length:277 Length:277
## 1st Qu.:2016 Class :character Class :character Class :character
## Median :2017 Mode :character Mode :character Mode :character
## Mean :2017
## 3rd Qu.:2019
## Max. :2020
##
## Height_in Weight_lbs Hand_Size_in Arm_Length_in
## Min. :65.75 Min. :170.0 Min. : 8.250 Min. :27.38
## 1st Qu.:69.13 1st Qu.:203.0 1st Qu.: 8.880 1st Qu.:30.00
## Median :70.38 Median :213.0 Median : 9.250 Median :31.00
## Mean :70.43 Mean :212.1 Mean : 9.237 Mean :30.88
## 3rd Qu.:71.75 3rd Qu.:222.0 3rd Qu.: 9.500 3rd Qu.:31.63
## Max. :75.00 Max. :259.0 Max. :10.500 Max. :33.75
## NA's :1 NA's :1
## X40_Yard_sec Bench_Press Vert_Leap_In Broad_Jump_in
## Min. :4.280 Min. : 5.00 Min. :27.00 Min. :106.0
## 1st Qu.:4.490 1st Qu.:16.00 1st Qu.:32.00 1st Qu.:116.0
## Median :4.560 Median :19.00 Median :34.50 Median :120.0
## Mean :4.558 Mean :18.79 Mean :34.55 Mean :119.7
## 3rd Qu.:4.630 3rd Qu.:22.00 3rd Qu.:36.50 3rd Qu.:123.0
## Max. :4.850 Max. :34.00 Max. :42.50 Max. :135.0
## NA's :14 NA's :28 NA's :16 NA's :25
## Shuttle_Shuttle X3Cone Team Round
## Min. :3.900 Min. :6.570 Length:277 Length:277
## 1st Qu.:4.225 1st Qu.:6.980 Class :character Class :character
## Median :4.320 Median :7.110 Mode :character Mode :character
## Mean :4.326 Mean :7.101
## 3rd Qu.:4.420 3rd Qu.:7.220
## Max. :4.630 Max. :7.680
## NA's :46 NA's :58
## Draft_Order RushYard_Per_Game GP Total_Yrd_pg
## Length:277 Length:277 Length:277 Length:277
## Class :character Class :character Class :character Class :character
## Mode :character Mode :character Mode :character Mode :character
##
##
##
##
## TD
## Length:277
## Class :character
## Mode :character
##
##
##
##
summary(df1)
## Year Name College POS
## Min. :2015 Length:277 Length:277 Length:277
## 1st Qu.:2016 Class :character Class :character Class :character
## Median :2017 Mode :character Mode :character Mode :character
## Mean :2017
## 3rd Qu.:2019
## Max. :2020
##
## Height_in Weight_lbs Hand_Size_in Arm_Length_in
## Min. :65.75 Min. :170.0 Min. : 8.250 Min. :27.38
## 1st Qu.:69.13 1st Qu.:203.0 1st Qu.: 8.880 1st Qu.:30.00
## Median :70.38 Median :213.0 Median : 9.250 Median :31.00
## Mean :70.43 Mean :212.1 Mean : 9.237 Mean :30.88
## 3rd Qu.:71.75 3rd Qu.:222.0 3rd Qu.: 9.500 3rd Qu.:31.63
## Max. :75.00 Max. :259.0 Max. :10.500 Max. :33.75
## NA's :1 NA's :1
## X40_Yard_sec Bench_Press Vert_Leap_In Broad_Jump_in
## Min. :4.280 Min. : 5.00 Min. :27.00 Min. :106.0
## 1st Qu.:4.490 1st Qu.:16.00 1st Qu.:32.00 1st Qu.:116.0
## Median :4.560 Median :19.00 Median :34.50 Median :120.0
## Mean :4.558 Mean :18.79 Mean :34.55 Mean :119.7
## 3rd Qu.:4.630 3rd Qu.:22.00 3rd Qu.:36.50 3rd Qu.:123.0
## Max. :4.850 Max. :34.00 Max. :42.50 Max. :135.0
## NA's :14 NA's :28 NA's :16 NA's :25
## Shuttle_Shuttle X3Cone Team Round
## Min. :3.900 Min. :6.570 Length:277 Length:277
## 1st Qu.:4.225 1st Qu.:6.980 Class :character Class :character
## Median :4.320 Median :7.110 Mode :character Mode :character
## Mean :4.326 Mean :7.101
## 3rd Qu.:4.420 3rd Qu.:7.220
## Max. :4.630 Max. :7.680
## NA's :46 NA's :58
## Draft_Order Rush_Attempt_pg GP Total_Yrd_pg
## Length:277 Length:277 Length:277 Length:277
## Class :character Class :character Class :character Class :character
## Mode :character Mode :character Mode :character Mode :character
##
##
##
##
## TD Rush_PA Rush_Yds_Gm Total_Yds
## Length:277 Length:277 Length:277 Length:277
## Class :character Class :character Class :character Class :character
## Mode :character Mode :character Mode :character Mode :character
##
##
##
##
cols.num <- c("Draft_Order","RushYard_Per_Game", "GP", "TD", "Total_Yrd_pg")
df[cols.num] <- sapply(df[cols.num],as.numeric)
## Warning in lapply(X = X, FUN = FUN, ...): NAs introduced by coercion
## Warning in lapply(X = X, FUN = FUN, ...): NAs introduced by coercion
## Warning in lapply(X = X, FUN = FUN, ...): NAs introduced by coercion
## Warning in lapply(X = X, FUN = FUN, ...): NAs introduced by coercion
## Warning in lapply(X = X, FUN = FUN, ...): NAs introduced by coercion
sapply(df, class)
cols.num <- c("Year")
df[cols.num] <- sapply(df[cols.num],as.factor)
sapply(df, class)
cols.num <- c("Draft_Order","Rush_Attempt_pg", "GP", "TD", "Rush_Yds_Gm","Total_Yrd_pg","Rush_PA", "Total_Yds")
df1[cols.num] <- sapply(df1[cols.num],as.numeric)
## Warning in lapply(X = X, FUN = FUN, ...): NAs introduced by coercion
## Warning in lapply(X = X, FUN = FUN, ...): NAs introduced by coercion
## Warning in lapply(X = X, FUN = FUN, ...): NAs introduced by coercion
## Warning in lapply(X = X, FUN = FUN, ...): NAs introduced by coercion
## Warning in lapply(X = X, FUN = FUN, ...): NAs introduced by coercion
## Warning in lapply(X = X, FUN = FUN, ...): NAs introduced by coercion
## Warning in lapply(X = X, FUN = FUN, ...): NAs introduced by coercion
## Warning in lapply(X = X, FUN = FUN, ...): NAs introduced by coercion
sapply(df1, class)
chart=ggplot(data=df, aes(x=Draft_Order, y=Total_Yrd_pg))+
geom_point(color="#69b3a2")+
geom_smooth(method='lm')
chart
## `geom_smooth()` using formula 'y ~ x'
## Warning: Removed 177 rows containing non-finite values (stat_smooth).
## Warning: Removed 177 rows containing missing values (geom_point).
chart=ggplot(data=df, aes(x=X40_Yard_sec, y=Weight_lbs))+
geom_point(color="#69b3a2")+
geom_smooth(method='lm')
chart
## `geom_smooth()` using formula 'y ~ x'
## Warning: Removed 14 rows containing non-finite values (stat_smooth).
## Warning: Removed 14 rows containing missing values (geom_point).
chart=ggplot(data=df, aes(x=Draft_Order, y=X40_Yard_sec))+
geom_point(color="#69b3a2")+
geom_smooth(method='lm')
chart
## `geom_smooth()` using formula 'y ~ x'
## Warning: Removed 162 rows containing non-finite values (stat_smooth).
## Warning: Removed 162 rows containing missing values (geom_point).
chart=ggplot(data=df, aes(x=Round, y=Total_Yrd_pg))+
geom_boxplot(color="#69b3a2")+
geom_smooth(method='lm')
chart
## Warning: Removed 130 rows containing non-finite values (stat_boxplot).
## `geom_smooth()` using formula 'y ~ x'
## Warning: Removed 130 rows containing non-finite values (stat_smooth).
It can be assumed that the original data was gathered with proper randomization methods.
Experimental Design How will the experiment be organized and conducted to test the hypothesis?
The experiment that will be conducted in this analysis will be to determine if the variation of Average Yard Per Game can be can be attributed to the variation of Rounds the Players are Drafted in. An analysis of variance with a confidence interval of 95% will be performed to determine if the a running back drafted in Round 1 or Round 2 of will have an effect on his NFL performance.
The null hypothesis for this ANOVA will be that :The means of Average Yard Per Game in First Round will be equal to the Average yards Per Game of Second Round.
The alternative hypothesis: The means of Average Yard Per Game in First Round will be different to the Average yards Per Game of Second Round.
What is the rationale for this design?
This dataframe in this study contains a single factor with multiple levels. Therefore, an ANOVA is the appropriate test to be performed.
model1=aov(df$Total_Yrd_pg ~ df$Round)
summary(model1)
## Df Sum Sq Mean Sq F value Pr(>F)
## df$Round 7 75704 10815 22.26 <2e-16 ***
## Residuals 139 67521 486
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 130 observations deleted due to missingness
TukeyHSD(model1, conf.level=.95)
## Tukey multiple comparisons of means
## 95% family-wise confidence level
##
## Fit: aov(formula = df$Total_Yrd_pg ~ df$Round)
##
## $`df$Round`
## diff lwr upr p adj
## 1-#N/A 72.302572 48.72367 95.881476 0.0000000
## 2-#N/A 54.672192 33.46404 75.880346 0.0000000
## 3-#N/A 33.974946 15.22736 52.722531 0.0000034
## 4-#N/A 15.450038 -2.60295 33.503026 0.1528320
## 5-#N/A 12.925244 -6.21916 32.069648 0.4343749
## 6-#N/A -4.572765 -27.24735 18.101817 0.9985468
## 7-#N/A 2.244055 -21.33485 25.822960 0.9999905
## 2-1 -17.630379 -46.16174 10.900982 0.5520819
## 3-1 -38.327626 -65.08066 -11.574594 0.0005352
## 4-1 -56.852533 -83.12349 -30.581577 0.0000000
## 5-1 -59.377327 -86.40992 -32.344737 0.0000000
## 6-1 -76.875336 -106.51297 -47.237701 0.0000000
## 7-1 -70.058517 -100.39360 -39.723430 0.0000000
## 3-2 -20.697246 -45.38623 3.991735 0.1721977
## 4-2 -39.222154 -63.38792 -15.056389 0.0000468
## 5-2 -41.746948 -66.73859 -16.755309 0.0000247
## 6-2 -59.244957 -87.03363 -31.456281 0.0000000
## 7-2 -52.428137 -80.95950 -23.896776 0.0000023
## 4-3 -18.524907 -40.56284 3.513022 0.1695852
## 5-3 -21.049702 -43.99024 1.890834 0.0974442
## 6-3 -38.547710 -64.50723 -12.588191 0.0002805
## 7-3 -31.730891 -58.48392 -4.977859 0.0086282
## 5-4 -2.524794 -24.90127 19.851680 0.9999696
## 6-4 -20.022803 -45.48523 5.439621 0.2400044
## 7-4 -13.205983 -39.47694 13.064973 0.7803895
## 6-5 -17.498009 -43.74554 8.749522 0.4513996
## 7-5 -10.681189 -37.71378 16.351402 0.9259404
## 7-6 6.816820 -22.82082 36.454455 0.9966517
As P value for difference between Round 1 and 2 is not <.05, we fail to reject null hupothesis. There is no statistical difference between a running back drafted in round 1 and 2.
plot(TukeyHSD(model1, conf.level=.95), las = 2)
The conclusion therefore is your team should not draft running back in first round.
(Q-Q) plots are graphs used to verify the distributional assumption for a set of data. As there is a straight line on a Q-Q plot. We can check this assumption for anova with the assumption that the test statistic follows a normal distribution.
The Normal Q-Q plots for both anova models produced relatively linear relationships between the residual and theoretical values, indicating that the use of ANOVA in this model was appropriate.
qqnorm(residuals(model1), main="Normal Q-Q Plot for NFL Player Weight", ylab="Total_Yard_pg Residuals")
qqline(residuals(model1))
A Residuals vs. Fits Plot is a common graph used in residual analysis. It is a scatter plot of residuals as a function of fitted values, or the estimated responses. These plots are used to identify linearity, outliers, and error variances. Here we do not see an linearity.
plot(fitted(model1),residuals(model1), main="Total_Yards_pg")
First round RBs are not any better at rushing than other round Running Backs
model2=aov(df1$Rush_PA ~ df$Round)
summary(model2)
## Df Sum Sq Mean Sq F value Pr(>F)
## df$Round 6 1.086 0.1810 1.083 0.399
## Residuals 25 4.179 0.1672
## 245 observations deleted due to missingness
TukeyHSD(model2, conf.level=.95)
## Tukey multiple comparisons of means
## 95% family-wise confidence level
##
## Fit: aov(formula = df1$Rush_PA ~ df$Round)
##
## $`df$Round`
## diff lwr upr p adj
## 1-#N/A -0.27196296 -1.1442832 0.6003572 0.9497984
## 2-#N/A 0.13142857 -0.7715086 1.0343657 0.9990790
## 3-#N/A -0.20275000 -1.0885953 0.6830953 0.9890697
## 4-#N/A -0.31000000 -1.8209029 1.2009029 0.9938638
## 5-#N/A 0.10416667 -0.9642030 1.1725363 0.9999087
## 7-#N/A -0.51000000 -2.0209029 1.0009029 0.9281641
## 2-1 0.40339153 -0.2560205 1.0628036 0.4639606
## 3-1 0.06921296 -0.5665942 0.7050201 0.9998264
## 4-1 -0.03803704 -1.4172964 1.3412223 0.9999999
## 5-1 0.37612963 -0.4961906 1.2484498 0.8070293
## 7-1 -0.23803704 -1.6172964 1.1412223 0.9976087
## 3-2 -0.33417857 -1.0113815 0.3430243 0.6959847
## 4-2 -0.44142857 -1.8402528 0.9573957 0.9469227
## 5-2 -0.02726190 -0.9301991 0.8756753 0.9999999
## 7-2 -0.64142857 -2.0402528 0.7573957 0.7604841
## 4-3 -0.10725000 -1.4951029 1.2806029 0.9999767
## 5-3 0.30691667 -0.5789287 1.1927620 0.9194878
## 7-3 -0.30725000 -1.6951029 1.0806029 0.9908157
## 5-4 0.41416667 -1.0967362 1.9250696 0.9727851
## 7-4 -0.20000000 -2.0504706 1.6504706 0.9998335
## 7-5 -0.61416667 -2.1250696 0.8967362 0.8451534
plot(TukeyHSD(model2, conf.level=.95), las = 2)
The multiple regerssion analysis will be performed in four parts:
Data assessment Modeling Assumption Testing Validating model on test data
There are no outliers that needs to be removed based on statistics summary.
Interestingly none of the Variables have a P value less than .05 except Draft Order. Which means none of the Combine Attribute can be used as regressors for running back performance except Draft Order.
reg_model = lm(Total_Yrd_pg ~ Height_in + Weight_lbs + Hand_Size_in + Arm_Length_in + X40_Yard_sec + Bench_Press + Vert_Leap_In + Broad_Jump_in + Shuttle_Shuttle + X3Cone + Draft_Order, data = df)
summary(reg_model)
##
## Call:
## lm(formula = Total_Yrd_pg ~ Height_in + Weight_lbs + Hand_Size_in +
## Arm_Length_in + X40_Yard_sec + Bench_Press + Vert_Leap_In +
## Broad_Jump_in + Shuttle_Shuttle + X3Cone + Draft_Order, data = df)
##
## Residuals:
## Min 1Q Median 3Q Max
## -40.394 -13.357 -2.109 17.578 65.163
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 45.07811 314.42474 0.143 0.887
## Height_in -2.08094 2.75234 -0.756 0.453
## Weight_lbs -0.12309 0.40684 -0.303 0.763
## Hand_Size_in 3.37050 6.77015 0.498 0.621
## Arm_Length_in 7.10201 4.25516 1.669 0.101
## X40_Yard_sec 8.80773 41.67023 0.211 0.833
## Bench_Press -0.36665 0.77023 -0.476 0.636
## Vert_Leap_In 1.34889 1.35809 0.993 0.325
## Broad_Jump_in -0.64200 0.90781 -0.707 0.482
## Shuttle_Shuttle 17.43804 25.72572 0.678 0.501
## X3Cone -16.32662 21.07976 -0.775 0.442
## Draft_Order -0.33719 0.04752 -7.096 2.41e-09 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 23.35 on 56 degrees of freedom
## (209 observations deleted due to missingness)
## Multiple R-squared: 0.5398, Adjusted R-squared: 0.4494
## F-statistic: 5.971 on 11 and 56 DF, p-value: 2.464e-06
Since Draft Order is the only significant regressor, a simple linear regression can be used.
From the results the R^2 is .488, we can see that 48.8% of the variability in Total Yards Per Game can be explained by Draft Order.
reg_model = lm(Total_Yrd_pg ~ Draft_Order, data = df)
summary(reg_model)
##
## Call:
## lm(formula = Total_Yrd_pg ~ Draft_Order, data = df)
##
## Residuals:
## Min 1Q Median 3Q Max
## -43.413 -13.096 -3.814 16.403 56.902
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 82.78447 4.67061 17.73 < 2e-16 ***
## Draft_Order -0.32989 0.03411 -9.67 6.3e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 22.72 on 98 degrees of freedom
## (177 observations deleted due to missingness)
## Multiple R-squared: 0.4883, Adjusted R-squared: 0.4831
## F-statistic: 93.51 on 1 and 98 DF, p-value: 6.299e-16
Lets look at it from different perspective now, all the variables could not predict how a player will do in NFL based on combine performance. But can combine performance predict Draft Order.
reg_model = lm(Draft_Order ~ Height_in + Weight_lbs + Hand_Size_in + Arm_Length_in + X40_Yard_sec + Bench_Press + Vert_Leap_In + Broad_Jump_in + Shuttle_Shuttle + X3Cone, data = df)
summary(reg_model)
##
## Call:
## lm(formula = Draft_Order ~ Height_in + Weight_lbs + Hand_Size_in +
## Arm_Length_in + X40_Yard_sec + Bench_Press + Vert_Leap_In +
## Broad_Jump_in + Shuttle_Shuttle + X3Cone, data = df)
##
## Residuals:
## Min 1Q Median 3Q Max
## -127.180 -45.053 3.441 47.136 132.178
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -1107.9913 778.9498 -1.422 0.1595
## Height_in 7.0178 6.8416 1.026 0.3086
## Weight_lbs -2.0272 1.0318 -1.965 0.0535 .
## Hand_Size_in -14.6222 16.7112 -0.875 0.3847
## Arm_Length_in -1.6183 10.6710 -0.152 0.8799
## X40_Yard_sec 144.7197 99.6076 1.453 0.1509
## Bench_Press 0.7464 1.9326 0.386 0.7005
## Vert_Leap_In 2.5891 3.4570 0.749 0.4565
## Broad_Jump_in -0.1417 2.3682 -0.060 0.9525
## Shuttle_Shuttle 70.5447 63.8162 1.105 0.2729
## X3Cone 45.5625 50.2495 0.907 0.3678
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 64.86 on 68 degrees of freedom
## (198 observations deleted due to missingness)
## Multiple R-squared: 0.1443, Adjusted R-squared: 0.01842
## F-statistic: 1.146 on 10 and 68 DF, p-value: 0.3421
Here we have same result, none of the aggressors have p value below .05, thus the reason for such a low R2.
BackWard_Elimination_model = lm(Draft_Order ~ Height_in + Weight_lbs + X40_Yard_sec + Shuttle_Shuttle + X3Cone, data = df1)
summary(BackWard_Elimination_model)
##
## Call:
## lm(formula = Draft_Order ~ Height_in + Weight_lbs + X40_Yard_sec +
## Shuttle_Shuttle + X3Cone, data = df1)
##
## Residuals:
## Min 1Q Median 3Q Max
## -122.907 -49.984 0.579 45.251 125.332
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -415.6297 609.0462 -0.682 0.4969
## Height_in 1.9823 6.0046 0.330 0.7422
## Weight_lbs -1.4921 0.8356 -1.786 0.0779 .
## X40_Yard_sec 52.6023 86.2382 0.610 0.5436
## Shuttle_Shuttle 41.0685 59.7683 0.687 0.4940
## X3Cone 45.3174 45.5682 0.994 0.3230
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 63.84 on 80 degrees of freedom
## (191 observations deleted due to missingness)
## Multiple R-squared: 0.08201, Adjusted R-squared: 0.02464
## F-statistic: 1.429 on 5 and 80 DF, p-value: 0.2227
We can see here linear regression is not a good model to predict NFL performance for running back based on Combine Metrics. There are other variables which are not in our data which has huge impact on Draft Order which are college Performance.
I believe an addition of College Performance along with a decision tree model can be a better predictor of draft order and therefore NFL performance.
Every year, teams fall in love with RB prospects because of their freakish athletic abilities. But they keep forgetting when it comes to draft season is that rushing stops being important at the NFL level. Today is a pass friendly league and teams needs to spend their draft resources on other highly valued skill players such as QB, WR, CB or Edge rushers. Don’t Draft a Running BAck in First Round.