Library

library(dplyr)

Importing Data

ball_data = read.csv("project_dataset_trackman_pitcher.csv")

ball_data[1:10,]
Fastball = ball_data %>% 
  dplyr::filter(TaggedPitchType == "Fastball") %>%
  dplyr::select(RelSpeed, Inning, SpinRate)

Fastball[1:10,]
summary(Fastball$RelSpeed)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##   84.95   87.32   88.32   88.30   89.33   92.40
summary(Fastball$SpinRate)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##    2191    2355    2390    2394    2438    2634
as.factor(Fastball$Inning)
##   [1] 4 3 3 4 6 6 2 1 3 4 6 5 2 5 5 5 2 5 2 2 3 4 5 8 6 5 3 3 1 5 5 3 7 7 7 1 1
##  [38] 7 4 5 2 7 7 4 4 1 2 1 7 2 3 3 2 3 2 6 7 1 2 1 1 5 7 5 7 6 5 4 3 7 2 4 3 5
##  [75] 3 2 2 6 5 3 6 4 6 6 4 1 8 4 1 3 6 1 6 6 1 1 2 1 3 1 6 1 4 4 2 7 7 2 5 2 1
## [112] 7 4 8 1 7 1 1 1 3 2 2 1 6 4 7 3 8 7 5 1 4 2 2 2 4 7 5 3 4 1 6 2 4 1 5 2 1
## [149] 3 2 4 2 6 3 1 5 2 4 5 5 1 2 2 1 1 4 6 1 2 1 4 6 5 1 5 1 2 2 1 2 3 2 5 6 5
## [186] 2 4 5 5 2 4 7 7 1 5 1 4 5 5 5 6 2 6 4 7 6 2 2 3 1 5 2 5 2 3 6 5 5 4 3 2 4
## [223] 7 6 6 4 6 1 1 2 4 3 2 5 1 4 3 3 2 5 4 2 1 7 2 1 4 4 4 2 7 5 1 2 5 2 1 3 4
## [260] 5 1 1 7 8 4 4 4 3 4 5 5 5 2 3 6 2 4 2 2
## Levels: 1 2 3 4 5 6 7 8

Plots

plot(Fastball$SpinRate,Fastball$RelSpeed, xlab = "Spin Rate", ylab = "Speed")

plot(Fastball$Inning,Fastball$RelSpeed, xlab = "Inning", ylab = "Speed")

Fastball Speed Linear Model

lm.fast_vel = lm(RelSpeed ~ SpinRate + as.factor(Inning), data=Fastball)

anova(lm.fast_vel)
summary(lm.fast_vel)
## 
## Call:
## lm(formula = RelSpeed ~ SpinRate + as.factor(Inning), data = Fastball)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -3.2911 -0.7938  0.0784  0.7468  2.8583 
## 
## Coefficients:
##                     Estimate Std. Error t value Pr(>|t|)    
## (Intercept)        72.442901   2.554647  28.357  < 2e-16 ***
## SpinRate            0.007083   0.001062   6.669 1.45e-10 ***
## as.factor(Inning)2 -0.860320   0.226104  -3.805 0.000175 ***
## as.factor(Inning)3 -1.019794   0.263683  -3.867 0.000138 ***
## as.factor(Inning)4 -0.982709   0.240589  -4.085 5.82e-05 ***
## as.factor(Inning)5 -1.613491   0.242211  -6.662 1.51e-10 ***
## as.factor(Inning)6 -1.922226   0.272348  -7.058 1.43e-11 ***
## as.factor(Inning)7 -2.066792   0.282910  -7.305 3.12e-12 ***
## as.factor(Inning)8 -2.023634   0.545176  -3.712 0.000250 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 1.144 on 270 degrees of freedom
## Multiple R-squared:  0.3789, Adjusted R-squared:  0.3605 
## F-statistic: 20.59 on 8 and 270 DF,  p-value: < 2.2e-16

Model Testing

plot(x = Fastball$RelSpeed,             
     y = lm.fast_vel$fitted.values,
     xlab = "True Value",
     ylab = "Estimated Value")
abline(b = 1, a = 0, col = "blue")

mse = sum((Fastball$RelSpeed-lm.fast_vel$fitted.values)^2)/length(Fastball$RelSpeed)

mse
## [1] 1.266342
summary(lm.fast_vel)$adj.r.squared
## [1] 0.3604633