library(dplyr)

## 
## Attaching package: 'dplyr'

## The following objects are masked from 'package:stats':
## 
##     filter, lag

## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union

library(tidyr)
library(ggplot2)
library(ggplot2)
library(qtl)

Indroduction

Abstract

Your team should not draft a Running Back in First Round.

In the NFL, passing is king. Rushing barely matters. Todays NFL is a pass first league, according to PFF, Rush Efficiency explains only 4.4% of the variance in winfs while Pass efficiency explains 62% of the variance in NFL, so Why do teams still draft Running Backs in the first round.

Then their is the part about their expensive rookie contracts. Running Back are the 4th most expensive position behind, QR, WR and Edge Rushers so it doesnt make sense to draft one in the first round.

Data

Our Data is running back combine and NFL statistics data from 2015-2020. I got the data from NFL PFF (Pro Football Focus).

df=read.csv("https://github.com/mianshariq/SPS/raw/dab24b98c3c1d48b96ea619c01caacfefa916386/Data%20606/Projects/NFL%20Data.csv")

df1=read.csv("https://github.com/mianshariq/SPS/raw/4fe676d9723fea08abb22f2021d644177dc16698/Data%20606/Projects/NFL%20Data%20Min.csv")

Variables

Dependent Variable: Average Yards Per Game

Independent Variables: Combine Metrics, Draft Order

Statistical Analysis

Exploratory Data Analysis: Descriptive Summary

glimpse(df)

## Rows: 277
## Columns: 21
## $ Year              <int> 2020, 2020, 2020, 2020, 2020, 2020, 2020, 2020, 2020~
## $ Name              <chr> "Salvon Ahmed", "Cam Akers", "Darius Anderson", "Lev~
## $ College           <chr> "Washington", "Florida State", "Texas Christian", "W~
## $ POS               <chr> "RB", "RB", "RB", "RB", "RB", "RB", "RB", "RB", "RB"~
## $ Height_in         <dbl> 70.88, 70.38, 70.50, 68.88, 68.88, 70.63, 67.88, 70.~
## $ Weight_lbs        <int> 197, 217, 208, 192, 207, 204, 188, 217, 247, 209, 21~
## $ Hand_Size_in      <dbl> 8.75, 9.00, 9.63, 8.75, 8.63, 9.75, 9.25, 9.25, 9.63~
## $ Arm_Length_in     <dbl> 29.25, 30.63, 30.50, 30.25, 31.25, 30.88, 30.00, 30.~
## $ X40_Yard_sec      <dbl> 4.62, 4.47, 4.61, 4.50, 4.57, NA, 4.42, 4.58, 4.53, ~
## $ Bench_Press       <int> NA, 20, 19, 16, 12, 13, 20, NA, 23, 23, NA, 15, 20, ~
## $ Vert_Leap_In      <dbl> 34.5, 35.5, 36.0, 39.5, 39.0, NA, 37.5, 33.5, 41.0, ~
## $ Broad_Jump_in     <int> 120, 122, 128, 125, 122, NA, 120, 119, 131, NA, 127,~
## $ Shuttle_Shuttle   <dbl> NA, 4.42, 4.19, NA, 4.25, NA, NA, 4.32, NA, NA, NA, ~
## $ X3Cone            <dbl> NA, NA, NA, NA, 6.97, NA, NA, 7.18, 7.19, NA, NA, NA~
## $ Team              <chr> "#N/A", "Rams", "#N/A", "#N/A", "Cardinals", "#N/A",~
## $ Round             <chr> "#N/A", "2", "#N/A", "#N/A", "7", "#N/A", "7", "4", ~
## $ Draft_Order       <chr> "40", "52", "#N/A", "#N/A", "222", "#N/A", "245", "1~
## $ RushYard_Per_Game <chr> "53.17", "48.08", "#N/A", "2.2", "#N/A", "#N/A", "0"~
## $ GP                <chr> "6", "13", "#N/A", "5", "#N/A", "#N/A", "4", "12", "~
## $ Total_Yrd_pg      <chr> "63.33", "57.54", "#N/A", "3.2", "#N/A", "#N/A", "0"~
## $ TD                <chr> "3", "2", "#N/A", "0", "#N/A", "#N/A", "0", "2", "2"~

summary(df)

##       Year          Name             College              POS           
##  Min.   :2015   Length:277         Length:277         Length:277        
##  1st Qu.:2016   Class :character   Class :character   Class :character  
##  Median :2017   Mode  :character   Mode  :character   Mode  :character  
##  Mean   :2017                                                           
##  3rd Qu.:2019                                                           
##  Max.   :2020                                                           
##                                                                         
##    Height_in       Weight_lbs     Hand_Size_in    Arm_Length_in  
##  Min.   :65.75   Min.   :170.0   Min.   : 8.250   Min.   :27.38  
##  1st Qu.:69.13   1st Qu.:203.0   1st Qu.: 8.880   1st Qu.:30.00  
##  Median :70.38   Median :213.0   Median : 9.250   Median :31.00  
##  Mean   :70.43   Mean   :212.1   Mean   : 9.237   Mean   :30.88  
##  3rd Qu.:71.75   3rd Qu.:222.0   3rd Qu.: 9.500   3rd Qu.:31.63  
##  Max.   :75.00   Max.   :259.0   Max.   :10.500   Max.   :33.75  
##                                  NA's   :1        NA's   :1      
##   X40_Yard_sec    Bench_Press     Vert_Leap_In   Broad_Jump_in  
##  Min.   :4.280   Min.   : 5.00   Min.   :27.00   Min.   :106.0  
##  1st Qu.:4.490   1st Qu.:16.00   1st Qu.:32.00   1st Qu.:116.0  
##  Median :4.560   Median :19.00   Median :34.50   Median :120.0  
##  Mean   :4.558   Mean   :18.79   Mean   :34.55   Mean   :119.7  
##  3rd Qu.:4.630   3rd Qu.:22.00   3rd Qu.:36.50   3rd Qu.:123.0  
##  Max.   :4.850   Max.   :34.00   Max.   :42.50   Max.   :135.0  
##  NA's   :14      NA's   :28      NA's   :16      NA's   :25     
##  Shuttle_Shuttle     X3Cone          Team              Round          
##  Min.   :3.900   Min.   :6.570   Length:277         Length:277        
##  1st Qu.:4.225   1st Qu.:6.980   Class :character   Class :character  
##  Median :4.320   Median :7.110   Mode  :character   Mode  :character  
##  Mean   :4.326   Mean   :7.101                                        
##  3rd Qu.:4.420   3rd Qu.:7.220                                        
##  Max.   :4.630   Max.   :7.680                                        
##  NA's   :46      NA's   :58                                           
##  Draft_Order        RushYard_Per_Game       GP            Total_Yrd_pg      
##  Length:277         Length:277         Length:277         Length:277        
##  Class :character   Class :character   Class :character   Class :character  
##  Mode  :character   Mode  :character   Mode  :character   Mode  :character  
##                                                                             
##                                                                             
##                                                                             
##                                                                             
##       TD           
##  Length:277        
##  Class :character  
##  Mode  :character  
##                    
##                    
##                    
##

summary(df1)

##       Year          Name             College              POS           
##  Min.   :2015   Length:277         Length:277         Length:277        
##  1st Qu.:2016   Class :character   Class :character   Class :character  
##  Median :2017   Mode  :character   Mode  :character   Mode  :character  
##  Mean   :2017                                                           
##  3rd Qu.:2019                                                           
##  Max.   :2020                                                           
##                                                                         
##    Height_in       Weight_lbs     Hand_Size_in    Arm_Length_in  
##  Min.   :65.75   Min.   :170.0   Min.   : 8.250   Min.   :27.38  
##  1st Qu.:69.13   1st Qu.:203.0   1st Qu.: 8.880   1st Qu.:30.00  
##  Median :70.38   Median :213.0   Median : 9.250   Median :31.00  
##  Mean   :70.43   Mean   :212.1   Mean   : 9.237   Mean   :30.88  
##  3rd Qu.:71.75   3rd Qu.:222.0   3rd Qu.: 9.500   3rd Qu.:31.63  
##  Max.   :75.00   Max.   :259.0   Max.   :10.500   Max.   :33.75  
##                                  NA's   :1        NA's   :1      
##   X40_Yard_sec    Bench_Press     Vert_Leap_In   Broad_Jump_in  
##  Min.   :4.280   Min.   : 5.00   Min.   :27.00   Min.   :106.0  
##  1st Qu.:4.490   1st Qu.:16.00   1st Qu.:32.00   1st Qu.:116.0  
##  Median :4.560   Median :19.00   Median :34.50   Median :120.0  
##  Mean   :4.558   Mean   :18.79   Mean   :34.55   Mean   :119.7  
##  3rd Qu.:4.630   3rd Qu.:22.00   3rd Qu.:36.50   3rd Qu.:123.0  
##  Max.   :4.850   Max.   :34.00   Max.   :42.50   Max.   :135.0  
##  NA's   :14      NA's   :28      NA's   :16      NA's   :25     
##  Shuttle_Shuttle     X3Cone          Team              Round          
##  Min.   :3.900   Min.   :6.570   Length:277         Length:277        
##  1st Qu.:4.225   1st Qu.:6.980   Class :character   Class :character  
##  Median :4.320   Median :7.110   Mode  :character   Mode  :character  
##  Mean   :4.326   Mean   :7.101                                        
##  3rd Qu.:4.420   3rd Qu.:7.220                                        
##  Max.   :4.630   Max.   :7.680                                        
##  NA's   :46      NA's   :58                                           
##  Draft_Order        Rush_Attempt_pg         GP            Total_Yrd_pg      
##  Length:277         Length:277         Length:277         Length:277        
##  Class :character   Class :character   Class :character   Class :character  
##  Mode  :character   Mode  :character   Mode  :character   Mode  :character  
##                                                                             
##                                                                             
##                                                                             
##                                                                             
##       TD              Rush_PA          Rush_Yds_Gm         Total_Yds        
##  Length:277         Length:277         Length:277         Length:277        
##  Class :character   Class :character   Class :character   Class :character  
##  Mode  :character   Mode  :character   Mode  :character   Mode  :character  
##                                                                             
##                                                                             
##                                                                             
##

cols.num <- c("Draft_Order","RushYard_Per_Game", "GP", "TD", "Total_Yrd_pg")
df[cols.num] <- sapply(df[cols.num],as.numeric)

## Warning in lapply(X = X, FUN = FUN, ...): NAs introduced by coercion

## Warning in lapply(X = X, FUN = FUN, ...): NAs introduced by coercion

## Warning in lapply(X = X, FUN = FUN, ...): NAs introduced by coercion

## Warning in lapply(X = X, FUN = FUN, ...): NAs introduced by coercion

## Warning in lapply(X = X, FUN = FUN, ...): NAs introduced by coercion

sapply(df, class)

cols.num <- c("Year")
df[cols.num] <- sapply(df[cols.num],as.factor)
sapply(df, class)

cols.num <- c("Draft_Order","Rush_Attempt_pg", "GP", "TD", "Rush_Yds_Gm","Total_Yrd_pg","Rush_PA", "Total_Yds")
df1[cols.num] <- sapply(df1[cols.num],as.numeric)

## Warning in lapply(X = X, FUN = FUN, ...): NAs introduced by coercion

## Warning in lapply(X = X, FUN = FUN, ...): NAs introduced by coercion

## Warning in lapply(X = X, FUN = FUN, ...): NAs introduced by coercion

## Warning in lapply(X = X, FUN = FUN, ...): NAs introduced by coercion

## Warning in lapply(X = X, FUN = FUN, ...): NAs introduced by coercion

## Warning in lapply(X = X, FUN = FUN, ...): NAs introduced by coercion

## Warning in lapply(X = X, FUN = FUN, ...): NAs introduced by coercion

## Warning in lapply(X = X, FUN = FUN, ...): NAs introduced by coercion

sapply(df1, class)

Graphics

chart=ggplot(data=df, aes(x=Draft_Order, y=Total_Yrd_pg))+
geom_point(color="#69b3a2")+
geom_smooth(method='lm')
chart

## `geom_smooth()` using formula 'y ~ x'

## Warning: Removed 177 rows containing non-finite values (stat_smooth).

## Warning: Removed 177 rows containing missing values (geom_point).

chart=ggplot(data=df, aes(x=X40_Yard_sec, y=Weight_lbs))+
geom_point(color="#69b3a2")+
geom_smooth(method='lm')
chart

## `geom_smooth()` using formula 'y ~ x'

## Warning: Removed 14 rows containing non-finite values (stat_smooth).

## Warning: Removed 14 rows containing missing values (geom_point).

chart=ggplot(data=df, aes(x=Draft_Order, y=X40_Yard_sec))+
geom_point(color="#69b3a2")+
geom_smooth(method='lm')
chart

## `geom_smooth()` using formula 'y ~ x'

## Warning: Removed 162 rows containing non-finite values (stat_smooth).

## Warning: Removed 162 rows containing missing values (geom_point).

chart=ggplot(data=df, aes(x=Round, y=Total_Yrd_pg))+
geom_boxplot(color="#69b3a2")+
geom_smooth(method='lm')
chart

## Warning: Removed 130 rows containing non-finite values (stat_boxplot).

## `geom_smooth()` using formula 'y ~ x'

## Warning: Removed 130 rows containing non-finite values (stat_smooth).

PART 1: ANOVA

It can be assumed that the original data was gathered with proper randomization methods.

Experimental Design How will the experiment be organized and conducted to test the hypothesis?

The experiment that will be conducted in this analysis will be to determine if the variation of Average Yard Per Game can be can be attributed to the variation of Rounds the Players are Drafted in. An analysis of variance with a confidence interval of 95% will be performed to determine if the a running back drafted in Round 1 or Round 2 of will have an effect on his NFL performance.

Hypothesis

The null hypothesis for this ANOVA will be that :The means of Average Yard Per Game in First Round will be equal to the Average yards Per Game of Second Round.

The alternative hypothesis: The means of Average Yard Per Game in First Round will be different to the Average yards Per Game of Second Round.

What is the rationale for this design?

This dataframe in this study contains a single factor with multiple levels. Therefore, an ANOVA is the appropriate test to be performed.

Model

model1=aov(df$Total_Yrd_pg ~ df$Round)
summary(model1)

##              Df Sum Sq Mean Sq F value Pr(>F)    
## df$Round      7  75704   10815   22.26 <2e-16 ***
## Residuals   139  67521     486                   
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 130 observations deleted due to missingness

TukeyHSD(model1, conf.level=.95)

##   Tukey multiple comparisons of means
##     95% family-wise confidence level
## 
## Fit: aov(formula = df$Total_Yrd_pg ~ df$Round)
## 
## $`df$Round`
##              diff        lwr        upr     p adj
## 1-#N/A  72.302572   48.72367  95.881476 0.0000000
## 2-#N/A  54.672192   33.46404  75.880346 0.0000000
## 3-#N/A  33.974946   15.22736  52.722531 0.0000034
## 4-#N/A  15.450038   -2.60295  33.503026 0.1528320
## 5-#N/A  12.925244   -6.21916  32.069648 0.4343749
## 6-#N/A  -4.572765  -27.24735  18.101817 0.9985468
## 7-#N/A   2.244055  -21.33485  25.822960 0.9999905
## 2-1    -17.630379  -46.16174  10.900982 0.5520819
## 3-1    -38.327626  -65.08066 -11.574594 0.0005352
## 4-1    -56.852533  -83.12349 -30.581577 0.0000000
## 5-1    -59.377327  -86.40992 -32.344737 0.0000000
## 6-1    -76.875336 -106.51297 -47.237701 0.0000000
## 7-1    -70.058517 -100.39360 -39.723430 0.0000000
## 3-2    -20.697246  -45.38623   3.991735 0.1721977
## 4-2    -39.222154  -63.38792 -15.056389 0.0000468
## 5-2    -41.746948  -66.73859 -16.755309 0.0000247
## 6-2    -59.244957  -87.03363 -31.456281 0.0000000
## 7-2    -52.428137  -80.95950 -23.896776 0.0000023
## 4-3    -18.524907  -40.56284   3.513022 0.1695852
## 5-3    -21.049702  -43.99024   1.890834 0.0974442
## 6-3    -38.547710  -64.50723 -12.588191 0.0002805
## 7-3    -31.730891  -58.48392  -4.977859 0.0086282
## 5-4     -2.524794  -24.90127  19.851680 0.9999696
## 6-4    -20.022803  -45.48523   5.439621 0.2400044
## 7-4    -13.205983  -39.47694  13.064973 0.7803895
## 6-5    -17.498009  -43.74554   8.749522 0.4513996
## 7-5    -10.681189  -37.71378  16.351402 0.9259404
## 7-6      6.816820  -22.82082  36.454455 0.9966517

As P value for difference between Round 1 and 2 is not <.05, we fail to reject null hupothesis. There is no statistical difference between a running back drafted in round 1 and 2.

plot(TukeyHSD(model1, conf.level=.95), las = 2)

The conclusion therefore is your team should not draft running back in first round.

Diagnostics/Model Assumptions Checking

(Q-Q) plots are graphs used to verify the distributional assumption for a set of data. As there is a straight line on a Q-Q plot. We can check this assumption for anova with the assumption that the test statistic follows a normal distribution.

The Normal Q-Q plots for both anova models produced relatively linear relationships between the residual and theoretical values, indicating that the use of ANOVA in this model was appropriate.

qqnorm(residuals(model1), main="Normal Q-Q Plot for NFL Player Weight", ylab="Total_Yard_pg Residuals")
qqline(residuals(model1))

A Residuals vs. Fits Plot is a common graph used in residual analysis. It is a scatter plot of residuals as a function of fitted values, or the estimated responses. These plots are used to identify linearity, outliers, and error variances. Here we do not see an linearity.

plot(fitted(model1),residuals(model1), main="Total_Yards_pg")

Second Model

First round RBs are not any better at rushing than other round Running Backs

model2=aov(df1$Rush_PA ~ df$Round)
summary(model2)

##             Df Sum Sq Mean Sq F value Pr(>F)
## df$Round     6  1.086  0.1810   1.083  0.399
## Residuals   25  4.179  0.1672               
## 245 observations deleted due to missingness

TukeyHSD(model2, conf.level=.95)

##   Tukey multiple comparisons of means
##     95% family-wise confidence level
## 
## Fit: aov(formula = df1$Rush_PA ~ df$Round)
## 
## $`df$Round`
##               diff        lwr       upr     p adj
## 1-#N/A -0.27196296 -1.1442832 0.6003572 0.9497984
## 2-#N/A  0.13142857 -0.7715086 1.0343657 0.9990790
## 3-#N/A -0.20275000 -1.0885953 0.6830953 0.9890697
## 4-#N/A -0.31000000 -1.8209029 1.2009029 0.9938638
## 5-#N/A  0.10416667 -0.9642030 1.1725363 0.9999087
## 7-#N/A -0.51000000 -2.0209029 1.0009029 0.9281641
## 2-1     0.40339153 -0.2560205 1.0628036 0.4639606
## 3-1     0.06921296 -0.5665942 0.7050201 0.9998264
## 4-1    -0.03803704 -1.4172964 1.3412223 0.9999999
## 5-1     0.37612963 -0.4961906 1.2484498 0.8070293
## 7-1    -0.23803704 -1.6172964 1.1412223 0.9976087
## 3-2    -0.33417857 -1.0113815 0.3430243 0.6959847
## 4-2    -0.44142857 -1.8402528 0.9573957 0.9469227
## 5-2    -0.02726190 -0.9301991 0.8756753 0.9999999
## 7-2    -0.64142857 -2.0402528 0.7573957 0.7604841
## 4-3    -0.10725000 -1.4951029 1.2806029 0.9999767
## 5-3     0.30691667 -0.5789287 1.1927620 0.9194878
## 7-3    -0.30725000 -1.6951029 1.0806029 0.9908157
## 5-4     0.41416667 -1.0967362 1.9250696 0.9727851
## 7-4    -0.20000000 -2.0504706 1.6504706 0.9998335
## 7-5    -0.61416667 -2.1250696 0.8967362 0.8451534

plot(TukeyHSD(model2, conf.level=.95), las = 2)

PART 2: Regression:

The multiple regerssion analysis will be performed in four parts:

Data assessment Modeling Assumption Testing Validating model on test data

Data Assessment:

There are no outliers that needs to be removed based on statistics summary.

Modeling

Interestingly none of the Variables have a P value less than .05 except Draft Order. Which means none of the Combine Attribute can be used as regressors for running back performance except Draft Order.

reg_model =  lm(Total_Yrd_pg ~ Height_in + Weight_lbs + Hand_Size_in + Arm_Length_in + X40_Yard_sec + Bench_Press + Vert_Leap_In + Broad_Jump_in + Shuttle_Shuttle + X3Cone + Draft_Order, data = df) 
summary(reg_model)

## 
## Call:
## lm(formula = Total_Yrd_pg ~ Height_in + Weight_lbs + Hand_Size_in + 
##     Arm_Length_in + X40_Yard_sec + Bench_Press + Vert_Leap_In + 
##     Broad_Jump_in + Shuttle_Shuttle + X3Cone + Draft_Order, data = df)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -40.394 -13.357  -2.109  17.578  65.163 
## 
## Coefficients:
##                  Estimate Std. Error t value Pr(>|t|)    
## (Intercept)      45.07811  314.42474   0.143    0.887    
## Height_in        -2.08094    2.75234  -0.756    0.453    
## Weight_lbs       -0.12309    0.40684  -0.303    0.763    
## Hand_Size_in      3.37050    6.77015   0.498    0.621    
## Arm_Length_in     7.10201    4.25516   1.669    0.101    
## X40_Yard_sec      8.80773   41.67023   0.211    0.833    
## Bench_Press      -0.36665    0.77023  -0.476    0.636    
## Vert_Leap_In      1.34889    1.35809   0.993    0.325    
## Broad_Jump_in    -0.64200    0.90781  -0.707    0.482    
## Shuttle_Shuttle  17.43804   25.72572   0.678    0.501    
## X3Cone          -16.32662   21.07976  -0.775    0.442    
## Draft_Order      -0.33719    0.04752  -7.096 2.41e-09 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 23.35 on 56 degrees of freedom
##   (209 observations deleted due to missingness)
## Multiple R-squared:  0.5398, Adjusted R-squared:  0.4494 
## F-statistic: 5.971 on 11 and 56 DF,  p-value: 2.464e-06

Since Draft Order is the only significant regressor, a simple linear regression can be used.

From the results the R^2 is .488, we can see that 48.8% of the variability in Total Yards Per Game can be explained by Draft Order.

reg_model =  lm(Total_Yrd_pg ~ Draft_Order, data = df) 
summary(reg_model)

## 
## Call:
## lm(formula = Total_Yrd_pg ~ Draft_Order, data = df)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -43.413 -13.096  -3.814  16.403  56.902 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept) 82.78447    4.67061   17.73  < 2e-16 ***
## Draft_Order -0.32989    0.03411   -9.67  6.3e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 22.72 on 98 degrees of freedom
##   (177 observations deleted due to missingness)
## Multiple R-squared:  0.4883, Adjusted R-squared:  0.4831 
## F-statistic: 93.51 on 1 and 98 DF,  p-value: 6.299e-16

Lets look at it from different perspective now, all the variables could not predict how a player will do in NFL based on combine performance. But can combine performance predict Draft Order.

reg_model =  lm(Draft_Order ~ Height_in + Weight_lbs + Hand_Size_in + Arm_Length_in + X40_Yard_sec + Bench_Press + Vert_Leap_In + Broad_Jump_in + Shuttle_Shuttle + X3Cone, data = df) 
summary(reg_model)

## 
## Call:
## lm(formula = Draft_Order ~ Height_in + Weight_lbs + Hand_Size_in + 
##     Arm_Length_in + X40_Yard_sec + Bench_Press + Vert_Leap_In + 
##     Broad_Jump_in + Shuttle_Shuttle + X3Cone, data = df)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -127.180  -45.053    3.441   47.136  132.178 
## 
## Coefficients:
##                   Estimate Std. Error t value Pr(>|t|)  
## (Intercept)     -1107.9913   778.9498  -1.422   0.1595  
## Height_in           7.0178     6.8416   1.026   0.3086  
## Weight_lbs         -2.0272     1.0318  -1.965   0.0535 .
## Hand_Size_in      -14.6222    16.7112  -0.875   0.3847  
## Arm_Length_in      -1.6183    10.6710  -0.152   0.8799  
## X40_Yard_sec      144.7197    99.6076   1.453   0.1509  
## Bench_Press         0.7464     1.9326   0.386   0.7005  
## Vert_Leap_In        2.5891     3.4570   0.749   0.4565  
## Broad_Jump_in      -0.1417     2.3682  -0.060   0.9525  
## Shuttle_Shuttle    70.5447    63.8162   1.105   0.2729  
## X3Cone             45.5625    50.2495   0.907   0.3678  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 64.86 on 68 degrees of freedom
##   (198 observations deleted due to missingness)
## Multiple R-squared:  0.1443, Adjusted R-squared:  0.01842 
## F-statistic: 1.146 on 10 and 68 DF,  p-value: 0.3421

Here we have same result, none of the aggressors have p value below .05, thus the reason for such a low R2.

Backward Elimination Model

BackWard_Elimination_model =  lm(Draft_Order ~ Height_in + Weight_lbs +  X40_Yard_sec + Shuttle_Shuttle + X3Cone, data = df1) 
summary(BackWard_Elimination_model)

## 
## Call:
## lm(formula = Draft_Order ~ Height_in + Weight_lbs + X40_Yard_sec + 
##     Shuttle_Shuttle + X3Cone, data = df1)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -122.907  -49.984    0.579   45.251  125.332 
## 
## Coefficients:
##                  Estimate Std. Error t value Pr(>|t|)  
## (Intercept)     -415.6297   609.0462  -0.682   0.4969  
## Height_in          1.9823     6.0046   0.330   0.7422  
## Weight_lbs        -1.4921     0.8356  -1.786   0.0779 .
## X40_Yard_sec      52.6023    86.2382   0.610   0.5436  
## Shuttle_Shuttle   41.0685    59.7683   0.687   0.4940  
## X3Cone            45.3174    45.5682   0.994   0.3230  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 63.84 on 80 degrees of freedom
##   (191 observations deleted due to missingness)
## Multiple R-squared:  0.08201,    Adjusted R-squared:  0.02464 
## F-statistic: 1.429 on 5 and 80 DF,  p-value: 0.2227

Conclusion

We can see here linear regression is not a good model to predict NFL performance for running back based on Combine Metrics. There are other variables which are not in our data which has huge impact on Draft Order which are college Performance.

I believe an addition of College Performance along with a decision tree model can be a better predictor of draft order and therefore NFL performance.

Every year, teams fall in love with RB prospects because of their freakish athletic abilities. But they keep forgetting when it comes to draft season is that rushing stops being important at the NFL level. Today is a pass friendly league and teams needs to spend their draft resources on other highly valued skill players such as QB, WR, CB or Edge rushers. Don’t Draft a Running BAck in First Round.

Project

Shariq Mian

12/7/2021