library(Lahman)
## Warning: package 'Lahman' was built under R version 3.2.3
library(plyr)
## Warning: package 'plyr' was built under R version 3.2.3
library(data.table)
## Warning: package 'data.table' was built under R version 3.2.3
Batting<- read.csv("lehmon data/Batting.csv")
dataframe.AB <- ddply(Batting,.(playerID),summarise,Career.AB=sum(AB,na.rm = T))
#dataframe.AB<-aggregate(AB~playerID,Batting,sum)
#BAtting.table <- data.table(Batting)
#dataframe.AB <- BAtting.table[,sum(AB),by=(playerID)]
#BAtting.table <-merge(Batting, dataframe.AB, by="playerID")
Batting <- merge(Batting, dataframe.AB, by="playerID")
Batting.5000 <- subset(Batting, Career.AB >= 5000)
ab.hr.so <- function(d){
c.AB <- sum(d$AB,na.rm=T)
c.HR <- sum(d$HR,na.rm = T)
c.SO <- sum(d$SO,na.rm = T)
data.frame(AB=c.AB, HR=c.HR, SO=c.SO)
# data.frame(playerID=d$playerID,AB=c.AB, HR=c.HR, SO=c.SO)
}
d.5000 <- ddply(Batting.5000, .(playerID), ab.hr.so)
#d.500<- ab.hr.so(Batting.5000)
with(d.5000, plot(HR/AB, SO/AB))
with(d.5000, lines(lowess(HR/AB, SO/AB),lwd=2))
we can see a clear positive relation between Home run rate and Strrick out rate.