Election Data
ed <- read.csv("E:\\EXCELR ASSIGMENTS\\election_data.csv")
View(ed)
attach(ed)
fit1<-glm(Result~Year+Amount.Spent+Popularity.Rank,data = ed,family = "binomial")
## Warning: glm.fit: algorithm did not converge
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
summary(fit1)
##
## Call:
## glm(formula = Result ~ Year + Amount.Spent + Popularity.Rank,
## family = "binomial", data = ed)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -1.291e-05 -2.110e-08 2.110e-08 2.110e-08 1.829e-05
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 63.560 629369.770 0.000 1.000
## Year 4.313 12237.762 0.000 1.000
## Amount.Spent 5.082 209842.916 0.000 1.000
## Popularity.Rank -81.380 122426.442 -0.001 0.999
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 1.3460e+01 on 9 degrees of freedom
## Residual deviance: 6.5897e-10 on 6 degrees of freedom
## (1 observation deleted due to missingness)
## AIC: 8
##
## Number of Fisher Scoring iterations: 25
# Linear regression technique can not be employed
prob1 <- predict(fit1,type="response")
# Logistic Regression
View(prob1)
logit<-glm(Result~Year+Amount.Spent+factor(Popularity.Rank),family=binomial,data = ed)
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
summary(logit)
##
## Call:
## glm(formula = Result ~ Year + Amount.Spent + factor(Popularity.Rank),
## family = binomial, data = ed)
##
## Deviance Residuals:
## 2 3 4 5 6 7
## -1.166e-05 2.110e-08 5.690e-06 -2.110e-08 7.560e-07 -2.110e-08
## 8 9 10 11
## 6.551e-06 1.259e-05 2.440e-06 -9.183e-06
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -218.740 675602.171 0 1
## Year 4.401 17137.652 0 1
## Amount.Spent 5.208 310985.668 0 1
## factor(Popularity.Rank)2 42.172 821079.594 0 1
## factor(Popularity.Rank)3 34.639 375325.352 0 1
## factor(Popularity.Rank)4 -48.761 330224.131 0 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 1.3460e+01 on 9 degrees of freedom
## Residual deviance: 4.6057e-10 on 4 degrees of freedom
## (1 observation deleted due to missingness)
## AIC: 12
##
## Number of Fisher Scoring iterations: 24
# Odds Ratio
exp(coef(logit))
## (Intercept) Year Amount.Spent
## 1.005112e-95 8.156961e+01 1.827071e+02
## factor(Popularity.Rank)2 factor(Popularity.Rank)3 factor(Popularity.Rank)4
## 2.065833e+18 1.105603e+15 6.656103e-22
# Confusion matrix table
prob <- predict(logit,type=c("response"),ed)
prob
## 1 2 3 4 5
## NA 6.792198e-11 1.000000e+00 1.000000e+00 2.220446e-16
## 6 7 8 9 10
## 1.000000e+00 2.220446e-16 1.000000e+00 1.000000e+00 1.000000e+00
## 11
## 4.216854e-11
confusion<-table(prob>0.5,ed$Result)
confusion
##
## 0 1
## FALSE 4 0
## TRUE 0 6
# Model Accuracy
Accuracy<-sum(diag(confusion)/sum(confusion))
Accuracy
## [1] 1
# ROC Curve
library(ROCR)
## Loading required package: gplots
##
## Attaching package: 'gplots'
## The following object is masked from 'package:stats':
##
## lowess
rocrpred<-prediction(prob,ed$Result)
rocrperf<-performance(rocrpred,'tpr','fpr')
plot(rocrperf,colorize=T,text.adj=c(-0.2,1.7))

# More area under the ROC Curve better is the logistic regression model obtained