baseball = read.csv("baseball.csv")
str(baseball)
## 'data.frame': 1232 obs. of 15 variables:
## $ Team : chr "ARI" "ATL" "BAL" "BOS" ...
## $ League : chr "NL" "NL" "AL" "AL" ...
## $ Year : int 2012 2012 2012 2012 2012 2012 2012 2012 2012 2012 ...
## $ RS : int 734 700 712 734 613 748 669 667 758 726 ...
## $ RA : int 688 600 705 806 759 676 588 845 890 670 ...
## $ W : int 81 94 93 69 61 85 97 68 64 88 ...
## $ OBP : num 0.328 0.32 0.311 0.315 0.302 0.318 0.315 0.324 0.33 0.335 ...
## $ SLG : num 0.418 0.389 0.417 0.415 0.378 0.422 0.411 0.381 0.436 0.422 ...
## $ BA : num 0.259 0.247 0.247 0.26 0.24 0.255 0.251 0.251 0.274 0.268 ...
## $ Playoffs : int 0 1 1 0 0 0 1 0 0 1 ...
## $ RankSeason : int NA 4 5 NA NA NA 2 NA NA 6 ...
## $ RankPlayoffs: int NA 5 4 NA NA NA 4 NA NA 2 ...
## $ G : int 162 162 162 162 162 162 162 162 162 162 ...
## $ OOBP : num 0.317 0.306 0.315 0.331 0.335 0.319 0.305 0.336 0.357 0.314 ...
## $ OSLG : num 0.415 0.378 0.403 0.428 0.424 0.405 0.39 0.43 0.47 0.402 ...
nrow(baseball)
## [1] 1232
table(baseball$Year)
##
## 1962 1963 1964 1965 1966 1967 1968 1969 1970 1971 1973 1974 1975 1976 1977 1978
## 20 20 20 20 20 20 20 24 24 24 24 24 24 24 26 26
## 1979 1980 1982 1983 1984 1985 1986 1987 1988 1989 1990 1991 1992 1993 1996 1997
## 26 26 26 26 26 26 26 26 26 26 26 26 26 28 28 28
## 1998 1999 2000 2001 2002 2003 2004 2005 2006 2007 2008 2009 2010 2011 2012
## 30 30 30 30 30 30 30 30 30 30 30 30 30 30 30
length(table(baseball$Year))
## [1] 47
baseball = subset(baseball, Playoffs == 1)
nrow(baseball)
## [1] 244
table(baseball$Year)
##
## 1962 1963 1964 1965 1966 1967 1968 1969 1970 1971 1973 1974 1975 1976 1977 1978
## 2 2 2 2 2 2 2 4 4 4 4 4 4 4 4 4
## 1979 1980 1982 1983 1984 1985 1986 1987 1988 1989 1990 1991 1992 1993 1996 1997
## 4 4 4 4 4 4 4 4 4 4 4 4 4 4 8 8
## 1998 1999 2000 2001 2002 2003 2004 2005 2006 2007 2008 2009 2010 2011 2012
## 8 8 8 8 8 8 8 8 8 8 8 8 8 8 10
table(table(baseball$Year))
##
## 2 4 8 10
## 7 23 16 1
PlayoffTable = table(baseball$Year)
PlayoffTable
##
## 1962 1963 1964 1965 1966 1967 1968 1969 1970 1971 1973 1974 1975 1976 1977 1978
## 2 2 2 2 2 2 2 4 4 4 4 4 4 4 4 4
## 1979 1980 1982 1983 1984 1985 1986 1987 1988 1989 1990 1991 1992 1993 1996 1997
## 4 4 4 4 4 4 4 4 4 4 4 4 4 4 8 8
## 1998 1999 2000 2001 2002 2003 2004 2005 2006 2007 2008 2009 2010 2011 2012
## 8 8 8 8 8 8 8 8 8 8 8 8 8 8 10
str(names(PlayoffTable))
## chr [1:47] "1962" "1963" "1964" "1965" "1966" "1967" "1968" "1969" "1970" ...
PlayoffTable[c("1990", "2001")]
##
## 1990 2001
## 4 8
baseball$NumCompetitors = PlayoffTable[as.character(baseball$Year)]
baseball$NumCompetitors
## [1] 10 10 10 10 10 10 10 10 10 10 8 8 8 8 8 8 8 8 8 8 8 8 8 8 8
## [26] 8 8 8 8 8 8 8 8 8 8 8 8 8 8 8 8 8 8 8 8 8 8 8 8 8
## [51] 8 8 8 8 8 8 8 8 8 8 8 8 8 8 8 8 8 8 8 8 8 8 8 8 8
## [76] 8 8 8 8 8 8 8 8 8 8 8 8 8 8 8 8 8 8 8 8 8 8 8 8 8
## [101] 8 8 8 8 8 8 8 8 8 8 8 8 8 8 8 8 8 8 8 8 8 8 8 8 8
## [126] 8 8 8 8 8 8 8 8 8 8 8 8 8 4 4 4 4 4 4 4 4 4 4 4 4
## [151] 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4
## [176] 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4
## [201] 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4
## [226] 4 4 4 4 4 2 2 2 2 2 2 2 2 2 2 2 2 2 2
table(baseball$NumCompetitors)
##
## 2 4 8 10
## 14 92 128 10
baseball$WorldSeries = as.numeric(baseball$RankPlayoffs == 1)
table(baseball$WorldSeries)
##
## 0 1
## 197 47
model1<-glm(WorldSeries~Year, data=baseball, family="binomial")
summary(model1)
##
## Call:
## glm(formula = WorldSeries ~ Year, family = "binomial", data = baseball)
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 72.23602 22.64409 3.19 0.00142 **
## Year -0.03700 0.01138 -3.25 0.00115 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 239.12 on 243 degrees of freedom
## Residual deviance: 228.35 on 242 degrees of freedom
## AIC: 232.35
##
## Number of Fisher Scoring iterations: 4
model2<-glm(WorldSeries~RS, data=baseball, family="binomial")
summary(model2)
##
## Call:
## glm(formula = WorldSeries ~ RS, family = "binomial", data = baseball)
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 0.661226 1.636494 0.404 0.686
## RS -0.002681 0.002098 -1.278 0.201
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 239.12 on 243 degrees of freedom
## Residual deviance: 237.45 on 242 degrees of freedom
## AIC: 241.45
##
## Number of Fisher Scoring iterations: 4
model3<-glm(WorldSeries~RA, data=baseball, family="binomial")
summary(model3)
##
## Call:
## glm(formula = WorldSeries ~ RA, family = "binomial", data = baseball)
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 1.888174 1.483831 1.272 0.2032
## RA -0.005053 0.002273 -2.223 0.0262 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 239.12 on 243 degrees of freedom
## Residual deviance: 233.88 on 242 degrees of freedom
## AIC: 237.88
##
## Number of Fisher Scoring iterations: 4
model4<-glm(WorldSeries~W, data=baseball, family="binomial")
summary(model4)
##
## Call:
## glm(formula = WorldSeries ~ W, family = "binomial", data = baseball)
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -6.85568 2.87620 -2.384 0.0171 *
## W 0.05671 0.02988 1.898 0.0577 .
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 239.12 on 243 degrees of freedom
## Residual deviance: 235.51 on 242 degrees of freedom
## AIC: 239.51
##
## Number of Fisher Scoring iterations: 4
model5<-glm(WorldSeries~OBP, data=baseball, family="binomial")
summary(model5)
##
## Call:
## glm(formula = WorldSeries ~ OBP, family = "binomial", data = baseball)
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 2.741 3.989 0.687 0.492
## OBP -12.402 11.865 -1.045 0.296
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 239.12 on 243 degrees of freedom
## Residual deviance: 238.02 on 242 degrees of freedom
## AIC: 242.02
##
## Number of Fisher Scoring iterations: 4
model6<-glm(WorldSeries~SLG, data=baseball, family="binomial")
summary(model6)
##
## Call:
## glm(formula = WorldSeries ~ SLG, family = "binomial", data = baseball)
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 3.200 2.358 1.357 0.1748
## SLG -11.130 5.689 -1.956 0.0504 .
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 239.12 on 243 degrees of freedom
## Residual deviance: 235.23 on 242 degrees of freedom
## AIC: 239.23
##
## Number of Fisher Scoring iterations: 4
model7<-glm(WorldSeries~BA, data=baseball, family="binomial")
summary(model7)
##
## Call:
## glm(formula = WorldSeries ~ BA, family = "binomial", data = baseball)
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -0.6392 3.8988 -0.164 0.870
## BA -2.9765 14.6123 -0.204 0.839
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 239.12 on 243 degrees of freedom
## Residual deviance: 239.08 on 242 degrees of freedom
## AIC: 243.08
##
## Number of Fisher Scoring iterations: 4
model8<-glm(WorldSeries~RankSeason, data=baseball, family="binomial")
summary(model8)
##
## Call:
## glm(formula = WorldSeries ~ RankSeason, family = "binomial",
## data = baseball)
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -0.8256 0.3268 -2.527 0.0115 *
## RankSeason -0.2069 0.1027 -2.016 0.0438 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 239.12 on 243 degrees of freedom
## Residual deviance: 234.75 on 242 degrees of freedom
## AIC: 238.75
##
## Number of Fisher Scoring iterations: 4
model9<-glm(WorldSeries~OOBP, data=baseball, family="binomial")
summary(model9)
##
## Call:
## glm(formula = WorldSeries ~ OOBP, family = "binomial", data = baseball)
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -0.9306 8.3728 -0.111 0.912
## OOBP -3.2233 26.0587 -0.124 0.902
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 84.926 on 113 degrees of freedom
## Residual deviance: 84.910 on 112 degrees of freedom
## (130 observations deleted due to missingness)
## AIC: 88.91
##
## Number of Fisher Scoring iterations: 4
model10<-glm(WorldSeries~OSLG, data=baseball, family="binomial")
summary(model10)
##
## Call:
## glm(formula = WorldSeries ~ OSLG, family = "binomial", data = baseball)
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -0.08725 6.07285 -0.014 0.989
## OSLG -4.65992 15.06881 -0.309 0.757
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 84.926 on 113 degrees of freedom
## Residual deviance: 84.830 on 112 degrees of freedom
## (130 observations deleted due to missingness)
## AIC: 88.83
##
## Number of Fisher Scoring iterations: 4
model11<-glm(WorldSeries~NumCompetitors, data=baseball, family="binomial")
summary(model11)
##
## Call:
## glm(formula = WorldSeries ~ NumCompetitors, family = "binomial",
## data = baseball)
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 0.03868 0.43750 0.088 0.929559
## NumCompetitors -0.25220 0.07422 -3.398 0.000678 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 239.12 on 243 degrees of freedom
## Residual deviance: 226.96 on 242 degrees of freedom
## AIC: 230.96
##
## Number of Fisher Scoring iterations: 4
model12<-glm(WorldSeries~League, data=baseball, family="binomial")
summary(model12)
##
## Call:
## glm(formula = WorldSeries ~ League, family = "binomial", data = baseball)
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -1.3558 0.2243 -6.045 1.5e-09 ***
## LeagueNL -0.1583 0.3252 -0.487 0.626
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 239.12 on 243 degrees of freedom
## Residual deviance: 238.88 on 242 degrees of freedom
## AIC: 242.88
##
## Number of Fisher Scoring iterations: 4
LogModel = glm(WorldSeries ~ Year + RA + RankSeason + NumCompetitors, data=baseball, family=binomial)
summary(LogModel)
##
## Call:
## glm(formula = WorldSeries ~ Year + RA + RankSeason + NumCompetitors,
## family = binomial, data = baseball)
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 12.5874376 53.6474210 0.235 0.814
## Year -0.0061425 0.0274665 -0.224 0.823
## RA -0.0008238 0.0027391 -0.301 0.764
## RankSeason -0.0685046 0.1203459 -0.569 0.569
## NumCompetitors -0.1794264 0.1815933 -0.988 0.323
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 239.12 on 243 degrees of freedom
## Residual deviance: 226.37 on 239 degrees of freedom
## AIC: 236.37
##
## Number of Fisher Scoring iterations: 4
cor(baseball[c("Year", "RA", "RankSeason", "NumCompetitors")])
## Year RA RankSeason NumCompetitors
## Year 1.0000000 0.4762422 0.3852191 0.9139548
## RA 0.4762422 1.0000000 0.3991413 0.5136769
## RankSeason 0.3852191 0.3991413 1.0000000 0.4247393
## NumCompetitors 0.9139548 0.5136769 0.4247393 1.0000000
model13 = glm(WorldSeries ~ Year + RA, data=baseball, family=binomial)
summary(model13)
##
## Call:
## glm(formula = WorldSeries ~ Year + RA, family = binomial, data = baseball)
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 63.610741 25.654830 2.479 0.0132 *
## Year -0.032084 0.013323 -2.408 0.0160 *
## RA -0.001766 0.002585 -0.683 0.4945
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 239.12 on 243 degrees of freedom
## Residual deviance: 227.88 on 241 degrees of freedom
## AIC: 233.88
##
## Number of Fisher Scoring iterations: 4
model14 = glm(WorldSeries ~ Year + RankSeason, data=baseball, family=binomial)
summary(model14)
##
## Call:
## glm(formula = WorldSeries ~ Year + RankSeason, family = binomial,
## data = baseball)
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 63.64855 24.37063 2.612 0.00901 **
## Year -0.03254 0.01231 -2.643 0.00822 **
## RankSeason -0.10064 0.11352 -0.887 0.37534
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 239.12 on 243 degrees of freedom
## Residual deviance: 227.55 on 241 degrees of freedom
## AIC: 233.55
##
## Number of Fisher Scoring iterations: 4
model15 = glm(WorldSeries ~ Year + NumCompetitors, data=baseball, family=binomial)
summary(model15)
##
## Call:
## glm(formula = WorldSeries ~ Year + NumCompetitors, family = binomial,
## data = baseball)
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 13.350467 53.481896 0.250 0.803
## Year -0.006802 0.027328 -0.249 0.803
## NumCompetitors -0.212610 0.175520 -1.211 0.226
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 239.12 on 243 degrees of freedom
## Residual deviance: 226.90 on 241 degrees of freedom
## AIC: 232.9
##
## Number of Fisher Scoring iterations: 4
model16 = glm(WorldSeries ~ RA + RankSeason, data=baseball, family=binomial)
summary(model16)
##
## Call:
## glm(formula = WorldSeries ~ RA + RankSeason, family = binomial,
## data = baseball)
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 1.487461 1.506143 0.988 0.323
## RA -0.003815 0.002441 -1.563 0.118
## RankSeason -0.140824 0.110908 -1.270 0.204
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 239.12 on 243 degrees of freedom
## Residual deviance: 232.22 on 241 degrees of freedom
## AIC: 238.22
##
## Number of Fisher Scoring iterations: 4
model17 = glm(WorldSeries ~ RA + NumCompetitors, data=baseball, family=binomial)
summary(model17)
##
## Call:
## glm(formula = WorldSeries ~ RA + NumCompetitors, family = binomial,
## data = baseball)
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 0.716895 1.528736 0.469 0.63911
## RA -0.001233 0.002661 -0.463 0.64313
## NumCompetitors -0.229385 0.088399 -2.595 0.00946 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 239.12 on 243 degrees of freedom
## Residual deviance: 226.74 on 241 degrees of freedom
## AIC: 232.74
##
## Number of Fisher Scoring iterations: 4
model18 = glm(WorldSeries ~ RankSeason + NumCompetitors, data=baseball, family=binomial)
summary(model18)
##
## Call:
## glm(formula = WorldSeries ~ RankSeason + NumCompetitors, family = binomial,
## data = baseball)
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 0.12277 0.45737 0.268 0.78837
## RankSeason -0.07697 0.11711 -0.657 0.51102
## NumCompetitors -0.22784 0.08201 -2.778 0.00546 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 239.12 on 243 degrees of freedom
## Residual deviance: 226.52 on 241 degrees of freedom
## AIC: 232.52
##
## Number of Fisher Scoring iterations: 4