Data<- read.csv("D:\\DataScience\\Assignments\\LogisticRegression\\election_data.csv")
Data <- scale(Data[-1,])
View(Data)
class(Data)
## [1] "matrix"
colnames(Data)
## [1] "Election.id" "Result" "Year" "Amount.Spent"
## [5] "Popularity.Rank"
Data <- as.data.frame(Data)
attach(Data)
attach(Data)
## The following objects are masked from Data (pos = 3):
##
## Amount.Spent, Election.id, Popularity.Rank, Result, Year
Model1 <- glm(Result ~ Election.id+Year+Amount.Spent+Popularity.Rank,data = Data)
summary(Model1)
##
## Call:
## glm(formula = Result ~ Election.id + Year + Amount.Spent + Popularity.Rank,
## data = Data)
##
## Deviance Residuals:
## 2 3 4 5 6 7 8
## -0.68295 -0.00784 -0.27360 -0.20117 -0.30670 -0.19348 0.19242
## 9 10 11
## 0.94553 1.10916 -0.58136
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -2.585e-16 2.535e-01 0.000 1.000
## Election.id 3.688e-02 3.028e-01 0.122 0.908
## Year 1.221e-01 3.373e-01 0.362 0.732
## Amount.Spent 1.491e-01 2.756e-01 0.541 0.612
## Popularity.Rank -6.968e-01 3.606e-01 -1.932 0.111
##
## (Dispersion parameter for gaussian family taken to be 0.642517)
##
## Null deviance: 9.0000 on 9 degrees of freedom
## Residual deviance: 3.2126 on 5 degrees of freedom
## AIC: 29.024
##
## Number of Fisher Scoring iterations: 2
library(MASS)
stepAIC(Model1)
## Start: AIC=29.02
## Result ~ Election.id + Year + Amount.Spent + Popularity.Rank
##
## Df Deviance AIC
## - Election.id 1 3.2221 27.053
## - Year 1 3.2967 27.282
## - Amount.Spent 1 3.4006 27.592
## <none> 3.2126 29.024
## - Popularity.Rank 1 5.6119 32.602
##
## Step: AIC=27.05
## Result ~ Year + Amount.Spent + Popularity.Rank
##
## Df Deviance AIC
## - Year 1 3.3432 25.422
## - Amount.Spent 1 3.4260 25.667
## <none> 3.2221 27.053
## - Popularity.Rank 1 6.0831 31.408
##
## Step: AIC=25.42
## Result ~ Amount.Spent + Popularity.Rank
##
## Df Deviance AIC
## - Amount.Spent 1 3.5331 23.975
## <none> 3.3432 25.422
## - Popularity.Rank 1 8.2069 32.403
##
## Step: AIC=23.97
## Result ~ Popularity.Rank
##
## Df Deviance AIC
## <none> 3.5331 23.975
## - Popularity.Rank 1 9.0000 31.325
##
## Call: glm(formula = Result ~ Popularity.Rank, data = Data)
##
## Coefficients:
## (Intercept) Popularity.Rank
## -1.660e-16 -7.794e-01
##
## Degrees of Freedom: 9 Total (i.e. Null); 8 Residual
## Null Deviance: 9
## Residual Deviance: 3.533 AIC: 23.97
influence.measures(Model1)
## Influence measures of
## glm(formula = Result ~ Election.id + Year + Amount.Spent + Popularity.Rank, data = Data) :
##
## dfb.1_ dfb.Elc. dfb.Year dfb.Am.S dfb.Pp.R dffit cov.r cook.d
## 2 -0.59125 0.36966 0.90557 0.2176 0.26484 -1.3397 1.0664 3.15e-01
## 3 -0.00747 0.00572 -0.00583 -0.0158 -0.00323 -0.0187 8.2342 8.77e-05
## 4 -0.18077 0.08672 -0.09523 0.1425 0.15905 -0.3852 4.4952 3.55e-02
## 5 -0.20472 -0.34787 0.10674 0.2671 0.00423 -0.5207 7.2074 6.54e-02
## 6 -0.20956 -0.05910 -0.08790 0.2005 0.24229 -0.4537 4.3265 4.86e-02
## 7 -0.13250 0.17732 0.02902 -0.0475 -0.15494 -0.2899 5.2299 2.05e-02
## 8 0.23448 0.38159 -0.37873 0.2433 -0.35988 0.6224 8.4671 9.31e-02
## 9 0.47095 0.25327 -0.11553 0.0325 -0.02274 0.5550 0.5034 5.21e-02
## 10 0.84876 -0.88428 0.51562 -0.6499 0.55738 1.4926 0.0776 2.48e-01
## 11 -0.71504 -0.07496 -1.28581 -0.4336 -1.30760 -1.8310 1.4313 5.82e-01
## hat inf
## 2 0.513
## 3 0.629 *
## 4 0.454 *
## 5 0.647 *
## 6 0.469 *
## 7 0.479 *
## 8 0.705 *
## 9 0.139
## 10 0.309
## 11 0.656 *
library(car)
## Warning: package 'car' was built under R version 3.5.1
## Loading required package: carData
influenceIndexPlot(Model1)

influencePlot(Model1)

## StudRes Hat CookD
## 2 -1.3042425 0.5134051 0.31481435
## 8 0.4029983 0.7046130 0.09307245
## 9 1.3819885 0.1388712 0.05211628
## 10 2.2307463 0.3092427 0.24818776
## 11 -1.3267427 0.6557238 0.58203137
Model2 <- glm(Result ~ exp(Election.id)+exp(Year)+Amount.Spent+Popularity.Rank,data = Data[-c(7,8,10,11),])
summary(Model2)
##
## Call:
## glm(formula = Result ~ exp(Election.id) + exp(Year) + Amount.Spent +
## Popularity.Rank, data = Data[-c(7, 8, 10, 11), ])
##
## Deviance Residuals:
## 2 3 4 5 6 7 10
## -0.31785 0.28563 -0.07988 0.00334 -0.35159 -0.58066 1.04101
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -0.9394 1.2740 -0.737 0.538
## exp(Election.id) -0.1352 0.2854 -0.474 0.682
## exp(Year) 0.7119 0.9868 0.721 0.546
## Amount.Spent 0.1089 0.3939 0.276 0.808
## Popularity.Rank 0.1757 1.3031 0.135 0.905
##
## (Dispersion parameter for gaussian family taken to be 0.8667395)
##
## Null deviance: 6.4286 on 6 degrees of freedom
## Residual deviance: 1.7335 on 2 degrees of freedom
## AIC: 22.095
##
## Number of Fisher Scoring iterations: 2
prob <- predict(Model2,type = c("response"),Data)
prob<-as.data.frame(prob)
final <- cbind(prob,Data)
confusion <- table(prob > 0.5,Data$Result)
table(prob > 0.5)
##
## FALSE TRUE
## 7 3
Accuracy <- sum(diag(confusion)/sum(confusion))
Accuracy
## [1] 0.5
library(ROCR)
## Warning: package 'ROCR' was built under R version 3.5.1
## Loading required package: gplots
## Warning: package 'gplots' was built under R version 3.5.1
##
## Attaching package: 'gplots'
## The following object is masked from 'package:stats':
##
## lowess
rocrpred<-prediction(prob,Data$Result)
rocrperf<-performance(rocrpred,'tpr','fpr')
plot(rocrperf,colorize=T,text.adj=c(-0.2,1.7))
