Logistic Regression

Predict election result

library(car)

## Loading required package: carData

Data = read.csv("C:\\data science\\ds\\datasetsandcodesandassignments\\election_data.csv")

View(Data)
attach(Data)

model <- glm(Result ~ ., data = Data, family = "binomial")

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

summary(model)

## 
## Call:
## glm(formula = Result ~ ., family = "binomial", data = Data)
## 
## Deviance Residuals: 
##          1           2           3           4           5           6  
## -1.007e-05   2.110e-08   2.110e-08  -6.461e-06   2.110e-08  -2.110e-08  
##          7           8           9          10  
##  2.110e-08   1.445e-05   7.061e-06  -9.419e-06  
## 
## Coefficients:
##                   Estimate Std. Error z value Pr(>|z|)
## (Intercept)      1.557e+02  5.375e+05   0.000        1
## Election.id      4.709e-02  2.247e+02   0.000        1
## Year             3.056e+00  1.038e+04   0.000        1
## Amount.Spent    -1.362e+01  9.118e+04   0.000        1
## Popularity.Rank -7.702e+01  1.319e+05  -0.001        1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 1.3460e+01  on 9  degrees of freedom
## Residual deviance: 4.9054e-10  on 5  degrees of freedom
## AIC: 10
## 
## Number of Fisher Scoring iterations: 25

model_prob <- predict(model, type = "response")

summary(model)

## 
## Call:
## glm(formula = Result ~ ., family = "binomial", data = Data)
## 
## Deviance Residuals: 
##          1           2           3           4           5           6  
## -1.007e-05   2.110e-08   2.110e-08  -6.461e-06   2.110e-08  -2.110e-08  
##          7           8           9          10  
##  2.110e-08   1.445e-05   7.061e-06  -9.419e-06  
## 
## Coefficients:
##                   Estimate Std. Error z value Pr(>|z|)
## (Intercept)      1.557e+02  5.375e+05   0.000        1
## Election.id      4.709e-02  2.247e+02   0.000        1
## Year             3.056e+00  1.038e+04   0.000        1
## Amount.Spent    -1.362e+01  9.118e+04   0.000        1
## Popularity.Rank -7.702e+01  1.319e+05  -0.001        1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 1.3460e+01  on 9  degrees of freedom
## Residual deviance: 4.9054e-10  on 5  degrees of freedom
## AIC: 10
## 
## Number of Fisher Scoring iterations: 25

# Odds Ratio
exp(coef(model))

##     (Intercept)     Election.id            Year    Amount.Spent 
##    4.267031e+67    1.048218e+00    2.124163e+01    1.213536e-06 
## Popularity.Rank 
##    3.547573e-34

# Confusion matrix table 
prob <- predict(model,type=c("response"),Data)
prob

##            1            2            3            4            5 
## 5.073345e-11 1.000000e+00 1.000000e+00 2.087538e-11 1.000000e+00 
##            6            7            8            9           10 
## 2.220446e-16 1.000000e+00 1.000000e+00 1.000000e+00 4.436037e-11

confusion<-table(prob>0.5,Data$Result)
confusion

##        
##         0 1
##   FALSE 4 0
##   TRUE  0 6

# Model Accuracy 
Accuracy<-sum(diag(confusion)/sum(confusion))
Accuracy

## [1] 1

# ROC Curve 
library(ROCR)

## Loading required package: gplots

## 
## Attaching package: 'gplots'

## The following object is masked from 'package:stats':
## 
##     lowess

rocrpred<-prediction(prob,Data$Result)
rocrperf<-performance(rocrpred,'tpr','fpr')
plot(rocrperf,colorize=T,text.adj=c(-0.2,1.7))