Logestic Regration

Election Data

ed <- read.csv("E:\\EXCELR ASSIGMENTS\\election_data.csv") 
View(ed)
attach(ed)


fit1<-glm(Result~Year+Amount.Spent+Popularity.Rank,data = ed,family = "binomial")

## Warning: glm.fit: algorithm did not converge

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

summary(fit1)

## 
## Call:
## glm(formula = Result ~ Year + Amount.Spent + Popularity.Rank, 
##     family = "binomial", data = ed)
## 
## Deviance Residuals: 
##        Min          1Q      Median          3Q         Max  
## -1.291e-05  -2.110e-08   2.110e-08   2.110e-08   1.829e-05  
## 
## Coefficients:
##                   Estimate Std. Error z value Pr(>|z|)
## (Intercept)         63.560 629369.770   0.000    1.000
## Year                 4.313  12237.762   0.000    1.000
## Amount.Spent         5.082 209842.916   0.000    1.000
## Popularity.Rank    -81.380 122426.442  -0.001    0.999
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 1.3460e+01  on 9  degrees of freedom
## Residual deviance: 6.5897e-10  on 6  degrees of freedom
##   (1 observation deleted due to missingness)
## AIC: 8
## 
## Number of Fisher Scoring iterations: 25

# Linear regression technique can not be employed
prob1 <- predict(fit1,type="response")
# Logistic Regression 
View(prob1)
logit<-glm(Result~Year+Amount.Spent+factor(Popularity.Rank),family=binomial,data = ed)

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

summary(logit)

## 
## Call:
## glm(formula = Result ~ Year + Amount.Spent + factor(Popularity.Rank), 
##     family = binomial, data = ed)
## 
## Deviance Residuals: 
##          2           3           4           5           6           7  
## -1.166e-05   2.110e-08   5.690e-06  -2.110e-08   7.560e-07  -2.110e-08  
##          8           9          10          11  
##  6.551e-06   1.259e-05   2.440e-06  -9.183e-06  
## 
## Coefficients:
##                            Estimate Std. Error z value Pr(>|z|)
## (Intercept)                -218.740 675602.171       0        1
## Year                          4.401  17137.652       0        1
## Amount.Spent                  5.208 310985.668       0        1
## factor(Popularity.Rank)2     42.172 821079.594       0        1
## factor(Popularity.Rank)3     34.639 375325.352       0        1
## factor(Popularity.Rank)4    -48.761 330224.131       0        1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 1.3460e+01  on 9  degrees of freedom
## Residual deviance: 4.6057e-10  on 4  degrees of freedom
##   (1 observation deleted due to missingness)
## AIC: 12
## 
## Number of Fisher Scoring iterations: 24

# Odds Ratio
exp(coef(logit))

##              (Intercept)                     Year             Amount.Spent 
##             1.005112e-95             8.156961e+01             1.827071e+02 
## factor(Popularity.Rank)2 factor(Popularity.Rank)3 factor(Popularity.Rank)4 
##             2.065833e+18             1.105603e+15             6.656103e-22

# Confusion matrix table 
prob <- predict(logit,type=c("response"),ed)
prob

##            1            2            3            4            5 
##           NA 6.792198e-11 1.000000e+00 1.000000e+00 2.220446e-16 
##            6            7            8            9           10 
## 1.000000e+00 2.220446e-16 1.000000e+00 1.000000e+00 1.000000e+00 
##           11 
## 4.216854e-11

confusion<-table(prob>0.5,ed$Result)
confusion

##        
##         0 1
##   FALSE 4 0
##   TRUE  0 6

# Model Accuracy 
Accuracy<-sum(diag(confusion)/sum(confusion))
Accuracy

## [1] 1

# ROC Curve 
library(ROCR)

## Loading required package: gplots

## 
## Attaching package: 'gplots'

## The following object is masked from 'package:stats':
## 
##     lowess

rocrpred<-prediction(prob,ed$Result)
rocrperf<-performance(rocrpred,'tpr','fpr')
plot(rocrperf,colorize=T,text.adj=c(-0.2,1.7))

# More area under the ROC Curve better is the logistic regression model obtained