LogisticRegression

Data<- read.csv("D:\\DataScience\\Assignments\\LogisticRegression\\election_data.csv")


Data <- scale(Data[-1,])

View(Data)

class(Data)

## [1] "matrix"

colnames(Data)

## [1] "Election.id"     "Result"          "Year"            "Amount.Spent"   
## [5] "Popularity.Rank"

Data <- as.data.frame(Data)

attach(Data)

attach(Data)

## The following objects are masked from Data (pos = 3):
## 
##     Amount.Spent, Election.id, Popularity.Rank, Result, Year

Model1 <- glm(Result ~  Election.id+Year+Amount.Spent+Popularity.Rank,data = Data)


summary(Model1)

## 
## Call:
## glm(formula = Result ~ Election.id + Year + Amount.Spent + Popularity.Rank, 
##     data = Data)
## 
## Deviance Residuals: 
##        2         3         4         5         6         7         8  
## -0.68295  -0.00784  -0.27360  -0.20117  -0.30670  -0.19348   0.19242  
##        9        10        11  
##  0.94553   1.10916  -0.58136  
## 
## Coefficients:
##                   Estimate Std. Error t value Pr(>|t|)
## (Intercept)     -2.585e-16  2.535e-01   0.000    1.000
## Election.id      3.688e-02  3.028e-01   0.122    0.908
## Year             1.221e-01  3.373e-01   0.362    0.732
## Amount.Spent     1.491e-01  2.756e-01   0.541    0.612
## Popularity.Rank -6.968e-01  3.606e-01  -1.932    0.111
## 
## (Dispersion parameter for gaussian family taken to be 0.642517)
## 
##     Null deviance: 9.0000  on 9  degrees of freedom
## Residual deviance: 3.2126  on 5  degrees of freedom
## AIC: 29.024
## 
## Number of Fisher Scoring iterations: 2

library(MASS)

stepAIC(Model1)

## Start:  AIC=29.02
## Result ~ Election.id + Year + Amount.Spent + Popularity.Rank
## 
##                   Df Deviance    AIC
## - Election.id      1   3.2221 27.053
## - Year             1   3.2967 27.282
## - Amount.Spent     1   3.4006 27.592
## <none>                 3.2126 29.024
## - Popularity.Rank  1   5.6119 32.602
## 
## Step:  AIC=27.05
## Result ~ Year + Amount.Spent + Popularity.Rank
## 
##                   Df Deviance    AIC
## - Year             1   3.3432 25.422
## - Amount.Spent     1   3.4260 25.667
## <none>                 3.2221 27.053
## - Popularity.Rank  1   6.0831 31.408
## 
## Step:  AIC=25.42
## Result ~ Amount.Spent + Popularity.Rank
## 
##                   Df Deviance    AIC
## - Amount.Spent     1   3.5331 23.975
## <none>                 3.3432 25.422
## - Popularity.Rank  1   8.2069 32.403
## 
## Step:  AIC=23.97
## Result ~ Popularity.Rank
## 
##                   Df Deviance    AIC
## <none>                 3.5331 23.975
## - Popularity.Rank  1   9.0000 31.325

## 
## Call:  glm(formula = Result ~ Popularity.Rank, data = Data)
## 
## Coefficients:
##     (Intercept)  Popularity.Rank  
##      -1.660e-16       -7.794e-01  
## 
## Degrees of Freedom: 9 Total (i.e. Null);  8 Residual
## Null Deviance:       9 
## Residual Deviance: 3.533     AIC: 23.97

influence.measures(Model1)

## Influence measures of
##   glm(formula = Result ~ Election.id + Year + Amount.Spent + Popularity.Rank,      data = Data) :
## 
##      dfb.1_ dfb.Elc. dfb.Year dfb.Am.S dfb.Pp.R   dffit  cov.r   cook.d
## 2  -0.59125  0.36966  0.90557   0.2176  0.26484 -1.3397 1.0664 3.15e-01
## 3  -0.00747  0.00572 -0.00583  -0.0158 -0.00323 -0.0187 8.2342 8.77e-05
## 4  -0.18077  0.08672 -0.09523   0.1425  0.15905 -0.3852 4.4952 3.55e-02
## 5  -0.20472 -0.34787  0.10674   0.2671  0.00423 -0.5207 7.2074 6.54e-02
## 6  -0.20956 -0.05910 -0.08790   0.2005  0.24229 -0.4537 4.3265 4.86e-02
## 7  -0.13250  0.17732  0.02902  -0.0475 -0.15494 -0.2899 5.2299 2.05e-02
## 8   0.23448  0.38159 -0.37873   0.2433 -0.35988  0.6224 8.4671 9.31e-02
## 9   0.47095  0.25327 -0.11553   0.0325 -0.02274  0.5550 0.5034 5.21e-02
## 10  0.84876 -0.88428  0.51562  -0.6499  0.55738  1.4926 0.0776 2.48e-01
## 11 -0.71504 -0.07496 -1.28581  -0.4336 -1.30760 -1.8310 1.4313 5.82e-01
##      hat inf
## 2  0.513    
## 3  0.629   *
## 4  0.454   *
## 5  0.647   *
## 6  0.469   *
## 7  0.479   *
## 8  0.705   *
## 9  0.139    
## 10 0.309    
## 11 0.656   *

library(car)

## Warning: package 'car' was built under R version 3.5.1

## Loading required package: carData

influenceIndexPlot(Model1)

influencePlot(Model1)

##       StudRes       Hat      CookD
## 2  -1.3042425 0.5134051 0.31481435
## 8   0.4029983 0.7046130 0.09307245
## 9   1.3819885 0.1388712 0.05211628
## 10  2.2307463 0.3092427 0.24818776
## 11 -1.3267427 0.6557238 0.58203137

Model2 <- glm(Result ~  exp(Election.id)+exp(Year)+Amount.Spent+Popularity.Rank,data = Data[-c(7,8,10,11),])
    

summary(Model2)

## 
## Call:
## glm(formula = Result ~ exp(Election.id) + exp(Year) + Amount.Spent + 
##     Popularity.Rank, data = Data[-c(7, 8, 10, 11), ])
## 
## Deviance Residuals: 
##        2         3         4         5         6         7        10  
## -0.31785   0.28563  -0.07988   0.00334  -0.35159  -0.58066   1.04101  
## 
## Coefficients:
##                  Estimate Std. Error t value Pr(>|t|)
## (Intercept)       -0.9394     1.2740  -0.737    0.538
## exp(Election.id)  -0.1352     0.2854  -0.474    0.682
## exp(Year)          0.7119     0.9868   0.721    0.546
## Amount.Spent       0.1089     0.3939   0.276    0.808
## Popularity.Rank    0.1757     1.3031   0.135    0.905
## 
## (Dispersion parameter for gaussian family taken to be 0.8667395)
## 
##     Null deviance: 6.4286  on 6  degrees of freedom
## Residual deviance: 1.7335  on 2  degrees of freedom
## AIC: 22.095
## 
## Number of Fisher Scoring iterations: 2

prob <- predict(Model2,type = c("response"),Data)

prob<-as.data.frame(prob)
final <- cbind(prob,Data)
confusion <- table(prob > 0.5,Data$Result)
table(prob > 0.5)

## 
## FALSE  TRUE 
##     7     3

Accuracy <- sum(diag(confusion)/sum(confusion))
Accuracy

## [1] 0.5

library(ROCR)

## Warning: package 'ROCR' was built under R version 3.5.1

## Loading required package: gplots

## Warning: package 'gplots' was built under R version 3.5.1

## 
## Attaching package: 'gplots'

## The following object is masked from 'package:stats':
## 
##     lowess

rocrpred<-prediction(prob,Data$Result)
rocrperf<-performance(rocrpred,'tpr','fpr')
plot(rocrperf,colorize=T,text.adj=c(-0.2,1.7))

LogisticRegression

Pratap

July 8, 2018