baseball = read.csv("baseball.csv")
str(baseball)
## 'data.frame':    1232 obs. of  15 variables:
##  $ Team        : chr  "ARI" "ATL" "BAL" "BOS" ...
##  $ League      : chr  "NL" "NL" "AL" "AL" ...
##  $ Year        : int  2012 2012 2012 2012 2012 2012 2012 2012 2012 2012 ...
##  $ RS          : int  734 700 712 734 613 748 669 667 758 726 ...
##  $ RA          : int  688 600 705 806 759 676 588 845 890 670 ...
##  $ W           : int  81 94 93 69 61 85 97 68 64 88 ...
##  $ OBP         : num  0.328 0.32 0.311 0.315 0.302 0.318 0.315 0.324 0.33 0.335 ...
##  $ SLG         : num  0.418 0.389 0.417 0.415 0.378 0.422 0.411 0.381 0.436 0.422 ...
##  $ BA          : num  0.259 0.247 0.247 0.26 0.24 0.255 0.251 0.251 0.274 0.268 ...
##  $ Playoffs    : int  0 1 1 0 0 0 1 0 0 1 ...
##  $ RankSeason  : int  NA 4 5 NA NA NA 2 NA NA 6 ...
##  $ RankPlayoffs: int  NA 5 4 NA NA NA 4 NA NA 2 ...
##  $ G           : int  162 162 162 162 162 162 162 162 162 162 ...
##  $ OOBP        : num  0.317 0.306 0.315 0.331 0.335 0.319 0.305 0.336 0.357 0.314 ...
##  $ OSLG        : num  0.415 0.378 0.403 0.428 0.424 0.405 0.39 0.43 0.47 0.402 ...
nrow(baseball)
## [1] 1232
table(baseball$Year)
## 
## 1962 1963 1964 1965 1966 1967 1968 1969 1970 1971 1973 1974 1975 1976 1977 1978 
##   20   20   20   20   20   20   20   24   24   24   24   24   24   24   26   26 
## 1979 1980 1982 1983 1984 1985 1986 1987 1988 1989 1990 1991 1992 1993 1996 1997 
##   26   26   26   26   26   26   26   26   26   26   26   26   26   28   28   28 
## 1998 1999 2000 2001 2002 2003 2004 2005 2006 2007 2008 2009 2010 2011 2012 
##   30   30   30   30   30   30   30   30   30   30   30   30   30   30   30
length(table(baseball$Year))
## [1] 47
baseball = subset(baseball, Playoffs == 1)
nrow(baseball)
## [1] 244
table(baseball$Year)
## 
## 1962 1963 1964 1965 1966 1967 1968 1969 1970 1971 1973 1974 1975 1976 1977 1978 
##    2    2    2    2    2    2    2    4    4    4    4    4    4    4    4    4 
## 1979 1980 1982 1983 1984 1985 1986 1987 1988 1989 1990 1991 1992 1993 1996 1997 
##    4    4    4    4    4    4    4    4    4    4    4    4    4    4    8    8 
## 1998 1999 2000 2001 2002 2003 2004 2005 2006 2007 2008 2009 2010 2011 2012 
##    8    8    8    8    8    8    8    8    8    8    8    8    8    8   10
table(table(baseball$Year))
## 
##  2  4  8 10 
##  7 23 16  1
PlayoffTable = table(baseball$Year)
PlayoffTable
## 
## 1962 1963 1964 1965 1966 1967 1968 1969 1970 1971 1973 1974 1975 1976 1977 1978 
##    2    2    2    2    2    2    2    4    4    4    4    4    4    4    4    4 
## 1979 1980 1982 1983 1984 1985 1986 1987 1988 1989 1990 1991 1992 1993 1996 1997 
##    4    4    4    4    4    4    4    4    4    4    4    4    4    4    8    8 
## 1998 1999 2000 2001 2002 2003 2004 2005 2006 2007 2008 2009 2010 2011 2012 
##    8    8    8    8    8    8    8    8    8    8    8    8    8    8   10
str(names(PlayoffTable))
##  chr [1:47] "1962" "1963" "1964" "1965" "1966" "1967" "1968" "1969" "1970" ...
PlayoffTable[c("1990", "2001")]
## 
## 1990 2001 
##    4    8
baseball$NumCompetitors = PlayoffTable[as.character(baseball$Year)]
baseball$NumCompetitors
##   [1] 10 10 10 10 10 10 10 10 10 10  8  8  8  8  8  8  8  8  8  8  8  8  8  8  8
##  [26]  8  8  8  8  8  8  8  8  8  8  8  8  8  8  8  8  8  8  8  8  8  8  8  8  8
##  [51]  8  8  8  8  8  8  8  8  8  8  8  8  8  8  8  8  8  8  8  8  8  8  8  8  8
##  [76]  8  8  8  8  8  8  8  8  8  8  8  8  8  8  8  8  8  8  8  8  8  8  8  8  8
## [101]  8  8  8  8  8  8  8  8  8  8  8  8  8  8  8  8  8  8  8  8  8  8  8  8  8
## [126]  8  8  8  8  8  8  8  8  8  8  8  8  8  4  4  4  4  4  4  4  4  4  4  4  4
## [151]  4  4  4  4  4  4  4  4  4  4  4  4  4  4  4  4  4  4  4  4  4  4  4  4  4
## [176]  4  4  4  4  4  4  4  4  4  4  4  4  4  4  4  4  4  4  4  4  4  4  4  4  4
## [201]  4  4  4  4  4  4  4  4  4  4  4  4  4  4  4  4  4  4  4  4  4  4  4  4  4
## [226]  4  4  4  4  4  2  2  2  2  2  2  2  2  2  2  2  2  2  2
table(baseball$NumCompetitors)
## 
##   2   4   8  10 
##  14  92 128  10
baseball$WorldSeries = as.numeric(baseball$RankPlayoffs == 1)
table(baseball$WorldSeries)
## 
##   0   1 
## 197  47
model1<-glm(WorldSeries~Year, data=baseball, family="binomial")
summary(model1)
## 
## Call:
## glm(formula = WorldSeries ~ Year, family = "binomial", data = baseball)
## 
## Coefficients:
##             Estimate Std. Error z value Pr(>|z|)   
## (Intercept) 72.23602   22.64409    3.19  0.00142 **
## Year        -0.03700    0.01138   -3.25  0.00115 **
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 239.12  on 243  degrees of freedom
## Residual deviance: 228.35  on 242  degrees of freedom
## AIC: 232.35
## 
## Number of Fisher Scoring iterations: 4
model2<-glm(WorldSeries~RS, data=baseball, family="binomial")
summary(model2)
## 
## Call:
## glm(formula = WorldSeries ~ RS, family = "binomial", data = baseball)
## 
## Coefficients:
##              Estimate Std. Error z value Pr(>|z|)
## (Intercept)  0.661226   1.636494   0.404    0.686
## RS          -0.002681   0.002098  -1.278    0.201
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 239.12  on 243  degrees of freedom
## Residual deviance: 237.45  on 242  degrees of freedom
## AIC: 241.45
## 
## Number of Fisher Scoring iterations: 4
model3<-glm(WorldSeries~RA, data=baseball, family="binomial")
summary(model3)
## 
## Call:
## glm(formula = WorldSeries ~ RA, family = "binomial", data = baseball)
## 
## Coefficients:
##              Estimate Std. Error z value Pr(>|z|)  
## (Intercept)  1.888174   1.483831   1.272   0.2032  
## RA          -0.005053   0.002273  -2.223   0.0262 *
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 239.12  on 243  degrees of freedom
## Residual deviance: 233.88  on 242  degrees of freedom
## AIC: 237.88
## 
## Number of Fisher Scoring iterations: 4
model4<-glm(WorldSeries~W, data=baseball, family="binomial")
summary(model4)
## 
## Call:
## glm(formula = WorldSeries ~ W, family = "binomial", data = baseball)
## 
## Coefficients:
##             Estimate Std. Error z value Pr(>|z|)  
## (Intercept) -6.85568    2.87620  -2.384   0.0171 *
## W            0.05671    0.02988   1.898   0.0577 .
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 239.12  on 243  degrees of freedom
## Residual deviance: 235.51  on 242  degrees of freedom
## AIC: 239.51
## 
## Number of Fisher Scoring iterations: 4
model5<-glm(WorldSeries~OBP, data=baseball, family="binomial")
summary(model5)
## 
## Call:
## glm(formula = WorldSeries ~ OBP, family = "binomial", data = baseball)
## 
## Coefficients:
##             Estimate Std. Error z value Pr(>|z|)
## (Intercept)    2.741      3.989   0.687    0.492
## OBP          -12.402     11.865  -1.045    0.296
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 239.12  on 243  degrees of freedom
## Residual deviance: 238.02  on 242  degrees of freedom
## AIC: 242.02
## 
## Number of Fisher Scoring iterations: 4
model6<-glm(WorldSeries~SLG, data=baseball, family="binomial")
summary(model6)
## 
## Call:
## glm(formula = WorldSeries ~ SLG, family = "binomial", data = baseball)
## 
## Coefficients:
##             Estimate Std. Error z value Pr(>|z|)  
## (Intercept)    3.200      2.358   1.357   0.1748  
## SLG          -11.130      5.689  -1.956   0.0504 .
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 239.12  on 243  degrees of freedom
## Residual deviance: 235.23  on 242  degrees of freedom
## AIC: 239.23
## 
## Number of Fisher Scoring iterations: 4
model7<-glm(WorldSeries~BA, data=baseball, family="binomial")
summary(model7)
## 
## Call:
## glm(formula = WorldSeries ~ BA, family = "binomial", data = baseball)
## 
## Coefficients:
##             Estimate Std. Error z value Pr(>|z|)
## (Intercept)  -0.6392     3.8988  -0.164    0.870
## BA           -2.9765    14.6123  -0.204    0.839
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 239.12  on 243  degrees of freedom
## Residual deviance: 239.08  on 242  degrees of freedom
## AIC: 243.08
## 
## Number of Fisher Scoring iterations: 4
model8<-glm(WorldSeries~RankSeason, data=baseball, family="binomial")
summary(model8)
## 
## Call:
## glm(formula = WorldSeries ~ RankSeason, family = "binomial", 
##     data = baseball)
## 
## Coefficients:
##             Estimate Std. Error z value Pr(>|z|)  
## (Intercept)  -0.8256     0.3268  -2.527   0.0115 *
## RankSeason   -0.2069     0.1027  -2.016   0.0438 *
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 239.12  on 243  degrees of freedom
## Residual deviance: 234.75  on 242  degrees of freedom
## AIC: 238.75
## 
## Number of Fisher Scoring iterations: 4
model9<-glm(WorldSeries~OOBP, data=baseball, family="binomial")
summary(model9)
## 
## Call:
## glm(formula = WorldSeries ~ OOBP, family = "binomial", data = baseball)
## 
## Coefficients:
##             Estimate Std. Error z value Pr(>|z|)
## (Intercept)  -0.9306     8.3728  -0.111    0.912
## OOBP         -3.2233    26.0587  -0.124    0.902
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 84.926  on 113  degrees of freedom
## Residual deviance: 84.910  on 112  degrees of freedom
##   (130 observations deleted due to missingness)
## AIC: 88.91
## 
## Number of Fisher Scoring iterations: 4
model10<-glm(WorldSeries~OSLG, data=baseball, family="binomial")
summary(model10)
## 
## Call:
## glm(formula = WorldSeries ~ OSLG, family = "binomial", data = baseball)
## 
## Coefficients:
##             Estimate Std. Error z value Pr(>|z|)
## (Intercept) -0.08725    6.07285  -0.014    0.989
## OSLG        -4.65992   15.06881  -0.309    0.757
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 84.926  on 113  degrees of freedom
## Residual deviance: 84.830  on 112  degrees of freedom
##   (130 observations deleted due to missingness)
## AIC: 88.83
## 
## Number of Fisher Scoring iterations: 4
model11<-glm(WorldSeries~NumCompetitors, data=baseball, family="binomial")
summary(model11)
## 
## Call:
## glm(formula = WorldSeries ~ NumCompetitors, family = "binomial", 
##     data = baseball)
## 
## Coefficients:
##                Estimate Std. Error z value Pr(>|z|)    
## (Intercept)     0.03868    0.43750   0.088 0.929559    
## NumCompetitors -0.25220    0.07422  -3.398 0.000678 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 239.12  on 243  degrees of freedom
## Residual deviance: 226.96  on 242  degrees of freedom
## AIC: 230.96
## 
## Number of Fisher Scoring iterations: 4
model12<-glm(WorldSeries~League, data=baseball, family="binomial")
summary(model12)
## 
## Call:
## glm(formula = WorldSeries ~ League, family = "binomial", data = baseball)
## 
## Coefficients:
##             Estimate Std. Error z value Pr(>|z|)    
## (Intercept)  -1.3558     0.2243  -6.045  1.5e-09 ***
## LeagueNL     -0.1583     0.3252  -0.487    0.626    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 239.12  on 243  degrees of freedom
## Residual deviance: 238.88  on 242  degrees of freedom
## AIC: 242.88
## 
## Number of Fisher Scoring iterations: 4
LogModel = glm(WorldSeries ~ Year + RA + RankSeason + NumCompetitors, data=baseball, family=binomial)
summary(LogModel)
## 
## Call:
## glm(formula = WorldSeries ~ Year + RA + RankSeason + NumCompetitors, 
##     family = binomial, data = baseball)
## 
## Coefficients:
##                  Estimate Std. Error z value Pr(>|z|)
## (Intercept)    12.5874376 53.6474210   0.235    0.814
## Year           -0.0061425  0.0274665  -0.224    0.823
## RA             -0.0008238  0.0027391  -0.301    0.764
## RankSeason     -0.0685046  0.1203459  -0.569    0.569
## NumCompetitors -0.1794264  0.1815933  -0.988    0.323
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 239.12  on 243  degrees of freedom
## Residual deviance: 226.37  on 239  degrees of freedom
## AIC: 236.37
## 
## Number of Fisher Scoring iterations: 4
cor(baseball[c("Year", "RA", "RankSeason", "NumCompetitors")])
##                     Year        RA RankSeason NumCompetitors
## Year           1.0000000 0.4762422  0.3852191      0.9139548
## RA             0.4762422 1.0000000  0.3991413      0.5136769
## RankSeason     0.3852191 0.3991413  1.0000000      0.4247393
## NumCompetitors 0.9139548 0.5136769  0.4247393      1.0000000
model13 = glm(WorldSeries ~ Year + RA, data=baseball, family=binomial)
summary(model13)
## 
## Call:
## glm(formula = WorldSeries ~ Year + RA, family = binomial, data = baseball)
## 
## Coefficients:
##              Estimate Std. Error z value Pr(>|z|)  
## (Intercept) 63.610741  25.654830   2.479   0.0132 *
## Year        -0.032084   0.013323  -2.408   0.0160 *
## RA          -0.001766   0.002585  -0.683   0.4945  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 239.12  on 243  degrees of freedom
## Residual deviance: 227.88  on 241  degrees of freedom
## AIC: 233.88
## 
## Number of Fisher Scoring iterations: 4
model14 = glm(WorldSeries ~ Year + RankSeason, data=baseball, family=binomial)
summary(model14)
## 
## Call:
## glm(formula = WorldSeries ~ Year + RankSeason, family = binomial, 
##     data = baseball)
## 
## Coefficients:
##             Estimate Std. Error z value Pr(>|z|)   
## (Intercept) 63.64855   24.37063   2.612  0.00901 **
## Year        -0.03254    0.01231  -2.643  0.00822 **
## RankSeason  -0.10064    0.11352  -0.887  0.37534   
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 239.12  on 243  degrees of freedom
## Residual deviance: 227.55  on 241  degrees of freedom
## AIC: 233.55
## 
## Number of Fisher Scoring iterations: 4
model15 = glm(WorldSeries ~ Year + NumCompetitors, data=baseball, family=binomial)
summary(model15)
## 
## Call:
## glm(formula = WorldSeries ~ Year + NumCompetitors, family = binomial, 
##     data = baseball)
## 
## Coefficients:
##                 Estimate Std. Error z value Pr(>|z|)
## (Intercept)    13.350467  53.481896   0.250    0.803
## Year           -0.006802   0.027328  -0.249    0.803
## NumCompetitors -0.212610   0.175520  -1.211    0.226
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 239.12  on 243  degrees of freedom
## Residual deviance: 226.90  on 241  degrees of freedom
## AIC: 232.9
## 
## Number of Fisher Scoring iterations: 4
model16 = glm(WorldSeries ~ RA + RankSeason, data=baseball, family=binomial)
summary(model16)
## 
## Call:
## glm(formula = WorldSeries ~ RA + RankSeason, family = binomial, 
##     data = baseball)
## 
## Coefficients:
##              Estimate Std. Error z value Pr(>|z|)
## (Intercept)  1.487461   1.506143   0.988    0.323
## RA          -0.003815   0.002441  -1.563    0.118
## RankSeason  -0.140824   0.110908  -1.270    0.204
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 239.12  on 243  degrees of freedom
## Residual deviance: 232.22  on 241  degrees of freedom
## AIC: 238.22
## 
## Number of Fisher Scoring iterations: 4
model17 = glm(WorldSeries ~ RA + NumCompetitors, data=baseball, family=binomial)
summary(model17)
## 
## Call:
## glm(formula = WorldSeries ~ RA + NumCompetitors, family = binomial, 
##     data = baseball)
## 
## Coefficients:
##                 Estimate Std. Error z value Pr(>|z|)   
## (Intercept)     0.716895   1.528736   0.469  0.63911   
## RA             -0.001233   0.002661  -0.463  0.64313   
## NumCompetitors -0.229385   0.088399  -2.595  0.00946 **
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 239.12  on 243  degrees of freedom
## Residual deviance: 226.74  on 241  degrees of freedom
## AIC: 232.74
## 
## Number of Fisher Scoring iterations: 4
model18 = glm(WorldSeries ~ RankSeason + NumCompetitors, data=baseball, family=binomial)
summary(model18)
## 
## Call:
## glm(formula = WorldSeries ~ RankSeason + NumCompetitors, family = binomial, 
##     data = baseball)
## 
## Coefficients:
##                Estimate Std. Error z value Pr(>|z|)   
## (Intercept)     0.12277    0.45737   0.268  0.78837   
## RankSeason     -0.07697    0.11711  -0.657  0.51102   
## NumCompetitors -0.22784    0.08201  -2.778  0.00546 **
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 239.12  on 243  degrees of freedom
## Residual deviance: 226.52  on 241  degrees of freedom
## AIC: 232.52
## 
## Number of Fisher Scoring iterations: 4