library(tidyverse)
## Warning: package 'tidyverse' was built under R version 3.5.3
## -- Attaching packages ------------------------------------------------------------ tidyverse 1.2.1 --
## v ggplot2 3.1.0 v purrr 0.3.3
## v tibble 2.1.1 v dplyr 0.8.1
## v tidyr 0.8.0 v stringr 1.4.0
## v readr 1.1.1 v forcats 0.3.0
## Warning: package 'ggplot2' was built under R version 3.5.1
## Warning: package 'tibble' was built under R version 3.5.3
## Warning: package 'purrr' was built under R version 3.5.3
## Warning: package 'dplyr' was built under R version 3.5.3
## Warning: package 'stringr' was built under R version 3.5.3
## -- Conflicts --------------------------------------------------------------- tidyverse_conflicts() --
## x dplyr::filter() masks stats::filter()
## x dplyr::lag() masks stats::lag()
library(ggrepel)
## Warning: package 'ggrepel' was built under R version 3.5.2
library(ggpmisc)
## Warning: package 'ggpmisc' was built under R version 3.5.3
## For news about 'ggpmisc', please, see https://www.r4photobiology.info/
combine <- read.csv('Data/WRCombineResults.csv')
yards <- read.csv('Data/ReceivingYards.csv')
combine$playerID <- gsub(pattern = '.*\\\\', '', combine$Player)
combine$Player <- gsub(pattern = '(\\*.*)|(\\+.I)|(\\\\.*)', '', combine$Player)
# combine
yards$playerID <- gsub(pattern = '.*\\\\', '', yards$Player)
yards$Player <- gsub(pattern = '(\\*.*)|(\\+.I)|(\\\\.*)', '', yards$Player)
yards %>%
group_by(Player) %>%
summarise(Receptions = sum(Rec),
Yards = sum(as.integer(Yds)),
TDs = sum(TD)) ->stats
df = merge(combine, stats, by = 'Player')
# df
df %>%
select(X40YD, Yards, TDs) %>%
filter(complete.cases(.)) %>%
ggplot(aes(x=X40YD, y = Yards)) +
geom_point(aes(x=X40YD, y = Yards)) +
geom_smooth(aes(x=X40YD, y = Yards),method = 'lm', se = F) +
labs(title = 'Yards by 40 Yard Dash Time', x = '40 Yard Dash Time') +
theme_classic() +
stat_poly_eq(formula = y ~ x,
eq.with.lhs = "italic(hat(y))~`=`~",
aes(label = paste(..eq.label.., ..rr.label.., sep = "~~~")),
parse = TRUE)
40 yard dash has little to no effect on career yards.
df %>%
select(X40YD, Yards, TDs) %>%
filter(complete.cases(.)) %>%
ggplot(aes(x=X40YD, y = TDs)) +
geom_point(aes(x=X40YD, y = TDs)) +
geom_smooth(aes(x=X40YD, y = TDs),method = 'lm', se = F) +
labs(title = 'TDs by 40 Yard Dash Time', x = '40 Yard Dash Time') +
theme_classic() +
stat_poly_eq(formula = y ~ x,
eq.with.lhs = "italic(hat(y))~`=`~",
aes(label = paste(..eq.label.., ..rr.label.., sep = "~~~")),
parse = TRUE)
40 yard dash has little to no effect on career Touchdowns.
df %>%
select(Vertical, Yards, TDs) %>%
filter(complete.cases(.)) %>%
ggplot(aes(x=Vertical, y = Yards)) +
geom_point(aes(x=Vertical, y = Yards)) +
geom_smooth(aes(x=Vertical, y = Yards),method = 'lm', se = F) +
labs(title = 'Yards by Vertical Jump', x = 'Vertical Jump') +
theme_classic() +
stat_poly_eq(formula = y ~ x,
eq.with.lhs = "italic(hat(y))~`=`~",
aes(label = paste(..eq.label.., ..rr.label.., sep = "~~~")),
parse = TRUE)
Vertical jump has little to no effect on career yards.
df %>%
select(Vertical, Yards, TDs) %>%
filter(complete.cases(.)) %>%
ggplot(aes(x=Vertical, y = TDs)) +
geom_point(aes(x=Vertical, y = TDs)) +
geom_smooth(aes(x=Vertical, y = TDs),method = 'lm', se = F) +
labs(title = 'Touchdowns by Vertical Jump', x = 'Vertical Jump') +
theme_classic() +
stat_poly_eq(formula = y ~ x,
eq.with.lhs = "italic(hat(y))~`=`~",
aes(label = paste(..eq.label.., ..rr.label.., sep = "~~~")),
parse = TRUE)
Vertical jump has little to no effect on career TDs.
df %>%
select(X3Cone, Yards, TDs) %>%
filter(complete.cases(.)) %>%
ggplot(aes(x=X3Cone, y = Yards)) +
geom_point(aes(x=X3Cone, y = Yards)) +
geom_smooth(aes(x=X3Cone, y = Yards),method = 'lm', se = F) +
labs(title = 'Yards by 3 Cone Drill Time', x = '3 Cone Drill Time') +
theme_classic() +
stat_poly_eq(formula = y ~ x,
eq.with.lhs = "italic(hat(y))~`=`~",
aes(label = paste(..eq.label.., ..rr.label.., sep = "~~~")),
parse = TRUE)
3 Cone Drill Time has little to no effect on career yards.
df %>%
select(X3Cone, Yards, TDs) %>%
filter(complete.cases(.)) %>%
ggplot(aes(x=X3Cone, y = TDs)) +
geom_point(aes(x=X3Cone, y = TDs)) +
geom_smooth(aes(x=X3Cone, y = TDs),method = 'lm', se = F) +
labs(title = 'Touchdowns by 3 Cone Drill Time', x = '3 Cone Drill Time') +
theme_classic() +
stat_poly_eq(formula = y ~ x,
eq.with.lhs = "italic(hat(y))~`=`~",
aes(label = paste(..eq.label.., ..rr.label.., sep = "~~~")),
parse = TRUE)
3 Cone Drill Time has little to no effect on career touchdowns.
df %>%
select(Shuttle, Yards, TDs) %>%
filter(complete.cases(.)) %>%
ggplot(aes(x=Shuttle, y = Yards)) +
geom_point(aes(x=Shuttle, y = Yards)) +
geom_smooth(aes(x=Shuttle, y = Yards),method = 'lm', se = F) +
labs(title = 'Yards by Shuttle Time', x = 'Shuttle Time') +
theme_classic() +
stat_poly_eq(formula = y ~ x,
eq.with.lhs = "italic(hat(y))~`=`~",
aes(label = paste(..eq.label.., ..rr.label.., sep = "~~~")),
parse = TRUE)
Shuttle time has little to no effect on career yards.
df %>%
select(Shuttle, Yards, TDs) %>%
filter(complete.cases(.)) %>%
ggplot(aes(x=Shuttle, y = TDs)) +
geom_point(aes(x=Shuttle, y = TDs)) +
geom_smooth(aes(x=Shuttle, y = TDs),method = 'lm', se = F) +
labs(title = 'Touchdowns by Shuttle Time', x = 'Shuttle Time') +
theme_classic() +
stat_poly_eq(formula = y ~ x,
eq.with.lhs = "italic(hat(y))~`=`~",
aes(label = paste(..eq.label.., ..rr.label.., sep = "~~~")),
parse = TRUE)
Shuttle Drill time has little to no effect on career Touchdowns.
mod <- lm(Yards ~ Shuttle + X3Cone + Vertical + X40YD, data = df)
summary(mod)
##
## Call:
## lm(formula = Yards ~ Shuttle + X3Cone + Vertical + X40YD, data = df)
##
## Residuals:
## Min 1Q Median 3Q Max
## -3023.2 -1961.5 -1246.4 692.6 16691.0
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 2381.81 12495.30 0.191 0.849
## Shuttle 852.67 1491.89 0.572 0.568
## X3Cone 897.92 1047.89 0.857 0.392
## Vertical 14.77 68.05 0.217 0.828
## X40YD -2334.18 2237.39 -1.043 0.298
##
## Residual standard error: 3175 on 277 degrees of freedom
## (195 observations deleted due to missingness)
## Multiple R-squared: 0.009051, Adjusted R-squared: -0.005259
## F-statistic: 0.6325 on 4 and 277 DF, p-value: 0.6397
None of these stats are statistically significant when attempting to predict career yards though the 40 yard dash time is the closest with a t-value less than -1.
mod <- lm(TDs ~ Shuttle + X3Cone + Vertical + X40YD, data = df)
summary(mod)
##
## Call:
## lm(formula = TDs ~ Shuttle + X3Cone + Vertical + X40YD, data = df)
##
## Residuals:
## Min 1Q Median 3Q Max
## -19.315 -11.872 -7.971 2.746 88.187
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -5.5956 77.8158 -0.072 0.943
## Shuttle 8.1022 9.2909 0.872 0.384
## X3Cone 6.0006 6.5259 0.920 0.359
## Vertical 0.1721 0.4238 0.406 0.685
## X40YD -14.0012 13.9336 -1.005 0.316
##
## Residual standard error: 19.78 on 277 degrees of freedom
## (195 observations deleted due to missingness)
## Multiple R-squared: 0.01232, Adjusted R-squared: -0.001943
## F-statistic: 0.8637 on 4 and 277 DF, p-value: 0.4861
None of these stats are statistically significant when attempting to predict career TDs. Like at all.