Project: U.S. Election Data from 1928 to 2016.

The independent variables we are going to use are: Unemployment Rate ,Real GDP, Inflation, consecutive Terms of President Party in office and Reelect .

The response variable is binary logistic regression ( 1 if incumbent party wins and 0 otherwise ) and the second model is percentage of incumbent president electorate vote.

# install.packages("ResourceSelection")
library(ResourceSelection)
## ResourceSelection 0.3-4   2019-01-08
data1 <- read.csv("glmdata1.csv")
head(data1)
##   Election.Year Year Iwin Party Unemployment RealGDP CGDP CInflation
## 1     1929-1933 1928    0     R         0.13 -0.1740 -1.2        0.8
## 2     1933-1937 1932    1     D         0.21  0.1401  5.1        2.9
## 3     1937-1941 1936    1     D         0.02  0.0690 17.7        9.9
## 4     1941-1945 1940    1     D         0.04  0.0450 -1.0        2.2
## 5     1945-1949 1944    1     D         0.03 -0.0120 -0.6        2.1
## 6     1949-1953 1948    1     D         0.04  0.0770  4.7        0.7
##   Inflation    Prctel Reelect Terms
## 1     0.064 0.5875490       0     1
## 2     0.017 0.4085104       1     1
## 3     0.002 0.6245973       1     2
## 4     0.061 0.5498388       1     3
## 5     0.080 0.5377684       1     4
## 6     0.027 0.5231854       1     5
# model 1; predict Iwin based on AVERAGE inflation and AVERAGE gdp 
glm.fit1 <- glm(formula = Iwin~Year+Party+Unemployment+RealGDP+Inflation+Reelect+Terms,family = binomial, data=data1)
summary(glm.fit1)
## 
## Call:
## glm(formula = Iwin ~ Year + Party + Unemployment + RealGDP + 
##     Inflation + Reelect + Terms, family = binomial, data = data1)
## 
## Deviance Residuals: 
##        Min          1Q      Median          3Q         Max  
## -3.971e-06  -3.971e-06   3.971e-06   3.971e-06   3.971e-06  
## 
## Coefficients:
##                Estimate Std. Error z value Pr(>|z|)
## (Intercept)   2.557e+01  3.892e+06   0.000        1
## Year         -4.310e-10  1.947e+03   0.000        1
## PartyR       -5.113e+01  1.001e+05  -0.001        1
## Unemployment -3.851e-07  1.273e+06   0.000        1
## RealGDP       1.184e-07  7.707e+05   0.000        1
## Inflation     4.073e-07  1.832e+06   0.000        1
## Reelect       1.106e-07  1.302e+05   0.000        1
## Terms        -8.450e-08  6.608e+04   0.000        1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 3.1841e+01  on 22  degrees of freedom
## Residual deviance: 3.6271e-10  on 15  degrees of freedom
## AIC: 16
## 
## Number of Fisher Scoring iterations: 24
# model 2; predict Iwin based on CURRENT inflation and CURRENT gdp 
glm.fit2 <- glm(formula = Iwin~Party+Unemployment+CGDP+CInflation+Reelect+Terms,family = binomial, data=data1)
summary(glm.fit2)
## 
## Call:
## glm(formula = Iwin ~ Party + Unemployment + CGDP + CInflation + 
##     Reelect + Terms, family = binomial, data = data1)
## 
## Deviance Residuals: 
##        Min          1Q      Median          3Q         Max  
## -3.971e-06  -3.971e-06   3.971e-06   3.971e-06   3.971e-06  
## 
## Coefficients:
##                Estimate Std. Error z value Pr(>|z|)
## (Intercept)   2.557e+01  1.871e+05   0.000        1
## PartyR       -5.113e+01  9.764e+04  -0.001        1
## Unemployment -2.024e-10  1.231e+06   0.000        1
## CGDP         -6.118e-11  1.491e+04   0.000        1
## CInflation    4.805e-11  2.154e+04   0.000        1
## Reelect      -8.107e-10  1.269e+05   0.000        1
## Terms         6.408e-10  6.347e+04   0.000        1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 3.1841e+01  on 22  degrees of freedom
## Residual deviance: 3.6271e-10  on 16  degrees of freedom
## AIC: 14
## 
## Number of Fisher Scoring iterations: 24
# model 3; predict Prctel based on AVERAGE inflation and AVERAGE gdp 
lm.fit1 <- glm(formula = Prctel~Year+Party+Unemployment+RealGDP+Inflation+Reelect+Terms, data=data1)
summary(lm.fit1)
## 
## Call:
## glm(formula = Prctel ~ Year + Party + Unemployment + RealGDP + 
##     Inflation + Reelect + Terms, data = data1)
## 
## Deviance Residuals: 
##      Min        1Q    Median        3Q       Max  
## -0.07178  -0.03521   0.00102   0.02423   0.07696  
## 
## Coefficients:
##                Estimate Std. Error t value Pr(>|t|)  
## (Intercept)   1.4045210  0.9111604   1.541   0.1440  
## Year         -0.0004600  0.0004559  -1.009   0.3290  
## PartyR        0.0257977  0.0234292   1.101   0.2882  
## Unemployment -0.4713065  0.2979969  -1.582   0.1346  
## RealGDP      -0.2568946  0.1804258  -1.424   0.1750  
## Inflation     0.3742882  0.4289920   0.872   0.3967  
## Reelect       0.0787378  0.0304870   2.583   0.0208 *
## Terms        -0.0064650  0.0154704  -0.418   0.6819  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for gaussian family taken to be 0.002557151)
## 
##     Null deviance: 0.079143  on 22  degrees of freedom
## Residual deviance: 0.038357  on 15  degrees of freedom
## AIC: -63.844
## 
## Number of Fisher Scoring iterations: 2
# model 2; predict Iwin based on CURRENT inflation and CURRENT gdp 
lm.fit2 <- glm(formula = Prctel~Party+Unemployment+CGDP+CInflation+Reelect+Terms, data=data1)
summary(lm.fit2)
## 
## Call:
## glm(formula = Prctel ~ Party + Unemployment + CGDP + CInflation + 
##     Reelect + Terms, data = data1)
## 
## Deviance Residuals: 
##       Min         1Q     Median         3Q        Max  
## -0.075454  -0.035136  -0.004232   0.034763   0.124851  
## 
## Coefficients:
##                Estimate Std. Error t value Pr(>|t|)    
## (Intercept)   0.4489875  0.0463537   9.686 4.27e-08 ***
## PartyR        0.0300575  0.0241915   1.242    0.232    
## Unemployment -0.2391574  0.3049674  -0.784    0.444    
## CGDP          0.0004809  0.0036937   0.130    0.898    
## CInflation    0.0064146  0.0053357   1.202    0.247    
## Reelect       0.0539566  0.0314378   1.716    0.105    
## Terms         0.0101886  0.0157259   0.648    0.526    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for gaussian family taken to be 0.002863958)
## 
##     Null deviance: 0.079143  on 22  degrees of freedom
## Residual deviance: 0.045823  on 16  degrees of freedom
## AIC: -61.753
## 
## Number of Fisher Scoring iterations: 2
# model 3
#glm.fit <- glm(formula = Iwin~Party+Unemployment+Terms,family = binomial, data=data1)
#summary(glm.fit)
# lower AIC = 8

hoslem.test(data1$Iwin, fitted(glm.fit1))
## 
##  Hosmer and Lemeshow goodness of fit (GOF) test
## 
## data:  data1$Iwin, fitted(glm.fit1)
## X-squared = 7.0964e-11, df = 8, p-value = 1
hoslem.test(data1$Iwin, fitted(glm.fit2))
## 
##  Hosmer and Lemeshow goodness of fit (GOF) test
## 
## data:  data1$Iwin, fitted(glm.fit2)
## X-squared = 7.0964e-11, df = 8, p-value = 1

Now letโ€™s try to predict using one of these models and see what percentage of the 23 observations is predicted correctly:

pred <- predict(glm.fit2)
pred = ifelse(pred<0,0,1)
correct = sum(pred == data1$Iwin)/length(pred)
correct
## [1] 1

Looks like 100%