library(tidyverse)
## Warning: package 'tidyverse' was built under R version 3.5.3
## -- Attaching packages ------------------------------------------------------------ tidyverse 1.2.1 --
## v ggplot2 3.1.0     v purrr   0.3.3
## v tibble  2.1.1     v dplyr   0.8.1
## v tidyr   0.8.0     v stringr 1.4.0
## v readr   1.1.1     v forcats 0.3.0
## Warning: package 'ggplot2' was built under R version 3.5.1
## Warning: package 'tibble' was built under R version 3.5.3
## Warning: package 'purrr' was built under R version 3.5.3
## Warning: package 'dplyr' was built under R version 3.5.3
## Warning: package 'stringr' was built under R version 3.5.3
## -- Conflicts --------------------------------------------------------------- tidyverse_conflicts() --
## x dplyr::filter() masks stats::filter()
## x dplyr::lag()    masks stats::lag()
library(ggrepel)
## Warning: package 'ggrepel' was built under R version 3.5.2
library(ggpmisc)
## Warning: package 'ggpmisc' was built under R version 3.5.3
## For news about 'ggpmisc', please, see https://www.r4photobiology.info/
combine <- read.csv('Data/WRCombineResults.csv')
yards <- read.csv('Data/ReceivingYards.csv')

combine$playerID <- gsub(pattern = '.*\\\\', '', combine$Player)
combine$Player <- gsub(pattern = '(\\*.*)|(\\+.I)|(\\\\.*)', '', combine$Player)
# combine

yards$playerID <- gsub(pattern = '.*\\\\', '', yards$Player)
yards$Player <- gsub(pattern = '(\\*.*)|(\\+.I)|(\\\\.*)', '', yards$Player)
yards %>%
  group_by(Player) %>%
  summarise(Receptions = sum(Rec),
            Yards = sum(as.integer(Yds)),
            TDs = sum(TD)) ->stats

df = merge(combine, stats, by = 'Player')
# df

40 Yard Dash Time

df %>% 
  select(X40YD, Yards, TDs) %>%
  filter(complete.cases(.)) %>%
  ggplot(aes(x=X40YD, y = Yards)) +
  geom_point(aes(x=X40YD, y = Yards)) +
  geom_smooth(aes(x=X40YD, y = Yards),method = 'lm', se = F) +
  labs(title = 'Yards by 40 Yard Dash Time', x = '40 Yard Dash Time') +
  theme_classic() +
  stat_poly_eq(formula = y ~ x,
               eq.with.lhs = "italic(hat(y))~`=`~",
                aes(label = paste(..eq.label.., ..rr.label.., sep = "~~~")), 
                parse = TRUE)

40 yard dash has little to no effect on career yards.

df %>%
  select(X40YD, Yards, TDs) %>%
  filter(complete.cases(.)) %>%
  ggplot(aes(x=X40YD, y = TDs)) +
  geom_point(aes(x=X40YD, y = TDs)) +
  geom_smooth(aes(x=X40YD, y = TDs),method = 'lm', se = F) +
  labs(title = 'TDs by 40 Yard Dash Time', x = '40 Yard Dash Time') +
  theme_classic() +
  stat_poly_eq(formula = y ~ x,
               eq.with.lhs = "italic(hat(y))~`=`~",
                aes(label = paste(..eq.label.., ..rr.label.., sep = "~~~")), 
                parse = TRUE)

40 yard dash has little to no effect on career Touchdowns.

Vertical

df %>%
  select(Vertical, Yards, TDs) %>%
  filter(complete.cases(.)) %>%
  ggplot(aes(x=Vertical, y = Yards)) +
  geom_point(aes(x=Vertical, y = Yards)) +
  geom_smooth(aes(x=Vertical, y = Yards),method = 'lm', se = F) +
  labs(title = 'Yards by Vertical Jump', x = 'Vertical Jump') +
  theme_classic() +
  stat_poly_eq(formula = y ~ x,
               eq.with.lhs = "italic(hat(y))~`=`~",
                aes(label = paste(..eq.label.., ..rr.label.., sep = "~~~")), 
                parse = TRUE)

Vertical jump has little to no effect on career yards.

df %>%
  select(Vertical, Yards, TDs) %>%
  filter(complete.cases(.)) %>%
  ggplot(aes(x=Vertical, y = TDs)) +
  geom_point(aes(x=Vertical, y = TDs)) +
  geom_smooth(aes(x=Vertical, y = TDs),method = 'lm', se = F) +
  labs(title = 'Touchdowns by Vertical Jump', x = 'Vertical Jump') +
  theme_classic() +
  stat_poly_eq(formula = y ~ x,
               eq.with.lhs = "italic(hat(y))~`=`~",
                aes(label = paste(..eq.label.., ..rr.label.., sep = "~~~")), 
                parse = TRUE)

Vertical jump has little to no effect on career TDs.

3 Cone Drill

df %>%
  select(X3Cone, Yards, TDs) %>%
  filter(complete.cases(.)) %>%
  ggplot(aes(x=X3Cone, y = Yards)) +
  geom_point(aes(x=X3Cone, y = Yards)) +
  geom_smooth(aes(x=X3Cone, y = Yards),method = 'lm', se = F) +
  labs(title = 'Yards by 3 Cone Drill Time', x = '3 Cone Drill Time') +
  theme_classic() +
  stat_poly_eq(formula = y ~ x,
               eq.with.lhs = "italic(hat(y))~`=`~",
                aes(label = paste(..eq.label.., ..rr.label.., sep = "~~~")), 
                parse = TRUE)

3 Cone Drill Time has little to no effect on career yards.

df %>%
  select(X3Cone, Yards, TDs) %>%
  filter(complete.cases(.)) %>%
  ggplot(aes(x=X3Cone, y = TDs)) +
  geom_point(aes(x=X3Cone, y = TDs)) +
  geom_smooth(aes(x=X3Cone, y = TDs),method = 'lm', se = F) +
  labs(title = 'Touchdowns by 3 Cone Drill Time', x = '3 Cone Drill Time') +
  theme_classic() +
  stat_poly_eq(formula = y ~ x,
               eq.with.lhs = "italic(hat(y))~`=`~",
                aes(label = paste(..eq.label.., ..rr.label.., sep = "~~~")), 
                parse = TRUE)

3 Cone Drill Time has little to no effect on career touchdowns.

Shuttle

df %>%
  select(Shuttle, Yards, TDs) %>%
  filter(complete.cases(.)) %>%
  ggplot(aes(x=Shuttle, y = Yards)) +
  geom_point(aes(x=Shuttle, y = Yards)) +
  geom_smooth(aes(x=Shuttle, y = Yards),method = 'lm', se = F) +
  labs(title = 'Yards by Shuttle Time', x = 'Shuttle Time') +
  theme_classic() +
  stat_poly_eq(formula = y ~ x,
               eq.with.lhs = "italic(hat(y))~`=`~",
                aes(label = paste(..eq.label.., ..rr.label.., sep = "~~~")), 
                parse = TRUE)

Shuttle time has little to no effect on career yards.

df %>%
  select(Shuttle, Yards, TDs) %>%
  filter(complete.cases(.)) %>%
  ggplot(aes(x=Shuttle, y = TDs)) +
  geom_point(aes(x=Shuttle, y = TDs)) +
  geom_smooth(aes(x=Shuttle, y = TDs),method = 'lm', se = F) +
  labs(title = 'Touchdowns by Shuttle Time', x = 'Shuttle Time') +
  theme_classic() +
  stat_poly_eq(formula = y ~ x,
               eq.with.lhs = "italic(hat(y))~`=`~",
                aes(label = paste(..eq.label.., ..rr.label.., sep = "~~~")), 
                parse = TRUE)

Shuttle Drill time has little to no effect on career Touchdowns.

One last try

mod <- lm(Yards ~ Shuttle + X3Cone + Vertical + X40YD, data = df)
summary(mod)
## 
## Call:
## lm(formula = Yards ~ Shuttle + X3Cone + Vertical + X40YD, data = df)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -3023.2 -1961.5 -1246.4   692.6 16691.0 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)
## (Intercept)  2381.81   12495.30   0.191    0.849
## Shuttle       852.67    1491.89   0.572    0.568
## X3Cone        897.92    1047.89   0.857    0.392
## Vertical       14.77      68.05   0.217    0.828
## X40YD       -2334.18    2237.39  -1.043    0.298
## 
## Residual standard error: 3175 on 277 degrees of freedom
##   (195 observations deleted due to missingness)
## Multiple R-squared:  0.009051,   Adjusted R-squared:  -0.005259 
## F-statistic: 0.6325 on 4 and 277 DF,  p-value: 0.6397

None of these stats are statistically significant when attempting to predict career yards though the 40 yard dash time is the closest with a t-value less than -1.

mod <- lm(TDs ~ Shuttle + X3Cone + Vertical + X40YD, data = df)
summary(mod)
## 
## Call:
## lm(formula = TDs ~ Shuttle + X3Cone + Vertical + X40YD, data = df)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -19.315 -11.872  -7.971   2.746  88.187 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)
## (Intercept)  -5.5956    77.8158  -0.072    0.943
## Shuttle       8.1022     9.2909   0.872    0.384
## X3Cone        6.0006     6.5259   0.920    0.359
## Vertical      0.1721     0.4238   0.406    0.685
## X40YD       -14.0012    13.9336  -1.005    0.316
## 
## Residual standard error: 19.78 on 277 degrees of freedom
##   (195 observations deleted due to missingness)
## Multiple R-squared:  0.01232,    Adjusted R-squared:  -0.001943 
## F-statistic: 0.8637 on 4 and 277 DF,  p-value: 0.4861

None of these stats are statistically significant when attempting to predict career TDs. Like at all.