library(dplyr); library(ggplot2); library(lme4)

ev <- read.csv('projecting_exit_velocity.csv')

ev <- ev %>% mutate(bbdistance = as.numeric(as.character(bbdistance)),
                    bbangle = as.numeric(as.character(bbangle)),
                    BIPvelocity = as.factor(BIPvelocity),
                    zone = as.factor(zone)
)

ev_ht <- filter(ev, hangtimetemp<=7.3)

ggplot(ev_ht, aes(x=BIPdistance, hangtimetemp, color=bbspeed))+geom_point()+ 
  scale_colour_gradient2(name="Exit Velocity", limits=c(40,120), midpoint=80, low="darkred", high="darkgreen", mid="yellow")+ggtitle("Hang Time Versus Distance")+
  ylab("Hang Time (s)")+xlab("Distance (ft)")+theme(legend.position=c(0.8,0.2))

library(mgcv)
fit <- gam(bbspeed~s(BIPdistance, I(50*hangtimetemp), k=20), data=ev_ht, method="REML")

BIPdistance.gam <- matrix(data=seq(from=0, to=500, length=100), nrow=100, ncol=100)
hangtimetemp.gam <- t(matrix(data=seq(from=0,to=8, length=100), nrow=100, ncol=100))

fitdata.gam <- data.frame(BIPdistance=as.vector(BIPdistance.gam),hangtimetemp=as.vector(hangtimetemp.gam))


fitdata.gam$preds.gam <- predict(fit, fitdata.gam, type="response")
fitdata.gam <- fitdata.gam %>% mutate(preds.gam=as.numeric(preds.gam))
fitdata.gam.filtered <- fitdata.gam %>% filter(preds.gam>=20) %>% filter(preds.gam <= 120) %>% filter(BIPdistance >= 50) 

ggplot(fitdata.gam.filtered, aes(BIPdistance, hangtimetemp, color = preds.gam))+geom_point()+ 
  scale_colour_gradient2(name="Exit Velocity",limits=c(40,120),midpoint=80, low="darkred", high="darkgreen", mid="yellow")+scale_x_continuous(limits=c(0,500))+
  ggtitle("Hang Time Versus Distance (Model)")+
  ylab("Hang Time (s)")+xlab("Distance (ft)")+theme(legend.position=c(0.8,0.2))

Adjusting for Park Effects and Getting Player Averages

ie_bis <- read.csv('ie_bis_join.csv')
playerdata <- read.csv('playerdata.csv')

ie_bis <- ie_bis %>% filter(hangtimetemp<=8)
ie_bis <- ie_bis %>% filter(hangtimetemp>0.2 | BIPdistance/hangtimetemp < 176)
ie_bis$preds.gam.bbspeed <- as.vector(predict(fit, ie_bis, type="response"))

ie_bis <- ie_bis %>% mutate(gamedate2 = as.Date(gamedate))
ie_bis <- ie_bis %>% mutate(season = format(gamedate2,'%Y'))


m <- lmer(preds.gam.bbspeed~(1|batterid)+(1|hometeamid), data=ie_bis)
summary(m)
## Linear mixed model fit by REML ['lmerMod']
## Formula: preds.gam.bbspeed ~ (1 | batterid) + (1 | hometeamid)
##    Data: ie_bis
## 
## REML criterion at convergence: 2022515
## 
## Scaled residuals: 
##     Min      1Q  Median      3Q     Max 
## -4.4978 -0.5632  0.1608  0.7291  5.9501 
## 
## Random effects:
##  Groups     Name        Variance Std.Dev.
##  batterid   (Intercept)   5.7977  2.4078 
##  hometeamid (Intercept)   0.6446  0.8029 
##  Residual               116.8205 10.8084 
## Number of obs: 265881, groups:  batterid, 1337; hometeamid, 30
## 
## Fixed effects:
##             Estimate Std. Error t value
## (Intercept)  88.6871     0.1691   524.3
park.mph.effects <- ranef(m)$hometeamid
batter.mph.effects<- ranef(m)$batterid
park.mph.effects$hometeamid = rownames(park.mph.effects)

colnames(park.mph.effects) <- c("parkeffect", "hometeamid")
ie_bis <- ie_bis %>% mutate(hometeamid = as.character(hometeamid))
ie_bis <- left_join(ie_bis, park.mph.effects, by="hometeamid")
ie_bis <- ie_bis %>% mutate(adj.pred.bbspeed = preds.gam.bbspeed - parkeffect)
mean.bbspeed.player.season <- ie_bis %>% group_by(batterid, season) %>% 
  summarize(mean.pred.adj.bbspeed = mean(adj.pred.bbspeed), n=length(adj.pred.bbspeed))  %>% 
  select(batterid, season, n, mean.pred.adj.bbspeed)

head(mean.bbspeed.player.season)
## Source: local data frame [6 x 4]
## Groups: batterid [4]
## 
##   batterid season     n mean.pred.adj.bbspeed
##      (int)  (chr) (int)                 (dbl)
## 1       19   2012    79              85.31295
## 2       25   2012    83              88.81808
## 3       25   2013    96              86.09953
## 4       25   2014    69              85.78475
## 5       45   2012   182              87.90138
## 6       49   2012     2              85.61702