Logistic Regression

Election dataset

Assignment 17

mydata <- read.csv("C:\\Users\\RISHI RAHUL\\Desktop\\DS\\4 Logistic\\Assignment\\election_data.csv")

sum(is.na(mydata))

## [1] 5

mydata <- na.omit(mydata)

mydata <- mydata[-1]
attach(mydata)

summary(mydata)

##      Result         Year        Amount.Spent   Popularity.Rank
##  Min.   :0.0   Min.   :32.00   Min.   :2.930   Min.   :1.00   
##  1st Qu.:0.0   1st Qu.:39.25   1st Qu.:3.618   1st Qu.:2.00   
##  Median :1.0   Median :43.00   Median :4.005   Median :3.00   
##  Mean   :0.6   Mean   :43.30   Mean   :4.229   Mean   :2.70   
##  3rd Qu.:1.0   3rd Qu.:49.50   3rd Qu.:4.470   3rd Qu.:3.75   
##  Max.   :1.0   Max.   :52.00   Max.   :6.320   Max.   :4.00

cor(mydata)

##                     Result        Year Amount.Spent Popularity.Rank
## Result           1.0000000  0.50422535   0.29685038      -0.7793831
## Year             0.5042253  1.00000000   0.06568657      -0.5281382
## Amount.Spent     0.2968504  0.06568657   1.00000000      -0.1982179
## Popularity.Rank -0.7793831 -0.52813819  -0.19821792       1.0000000

# Logistic Regression Model
model <- glm(Result~ Year + Amount.Spent + factor(Popularity.Rank), mydata , family = "binomial")

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

summary(model)

## 
## Call:
## glm(formula = Result ~ Year + Amount.Spent + factor(Popularity.Rank), 
##     family = "binomial", data = mydata)
## 
## Deviance Residuals: 
##          2           3           4           5           6           7  
## -1.166e-05   2.110e-08   5.690e-06  -2.110e-08   7.560e-07  -2.110e-08  
##          8           9          10          11  
##  6.551e-06   1.259e-05   2.440e-06  -9.183e-06  
## 
## Coefficients:
##                            Estimate Std. Error z value Pr(>|z|)
## (Intercept)                -218.740 675602.171       0        1
## Year                          4.401  17137.652       0        1
## Amount.Spent                  5.208 310985.668       0        1
## factor(Popularity.Rank)2     42.172 821079.594       0        1
## factor(Popularity.Rank)3     34.639 375325.352       0        1
## factor(Popularity.Rank)4    -48.761 330224.131       0        1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 1.3460e+01  on 9  degrees of freedom
## Residual deviance: 4.6057e-10  on 4  degrees of freedom
## AIC: 12
## 
## Number of Fisher Scoring iterations: 24

library(car)

## Warning: package 'car' was built under R version 3.5.1

## Loading required package: carData

library(MASS)

stepAIC(model)

## Start:  AIC=12
## Result ~ Year + Amount.Spent + factor(Popularity.Rank)

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

##                           Df Deviance    AIC
## - Amount.Spent             1   0.0000 10.000
## <none>                         0.0000 12.000
## - Year                     1   3.8177 13.818
## - factor(Popularity.Rank)  3   9.8091 15.809

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## 
## Step:  AIC=10
## Result ~ Year + factor(Popularity.Rank)
## 
##                           Df Deviance    AIC
## <none>                         0.0000 10.000
## - Year                     1   3.8191 11.819
## - factor(Popularity.Rank)  3  10.7150 14.715

## 
## Call:  glm(formula = Result ~ Year + factor(Popularity.Rank), family = "binomial", 
##     data = mydata)
## 
## Coefficients:
##              (Intercept)                      Year  
##                 -212.991                     4.663  
## factor(Popularity.Rank)2  factor(Popularity.Rank)3  
##                   55.654                    40.360  
## factor(Popularity.Rank)4  
##                  -44.050  
## 
## Degrees of Freedom: 9 Total (i.e. Null);  5 Residual
## Null Deviance:       13.46 
## Residual Deviance: 4.632e-10     AIC: 10

avPlots(model)

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

# Prediction Model
prob <- predict(model, mydata, type = "response")
prob <- as.data.frame(prob)

final <- cbind(mydata, prob)

confusion <- table(prob>0.5, Result)
confusion

##        Result
##         0 1
##   FALSE 4 0
##   TRUE  0 6

accuracy <- sum(diag(confusion)/sum(confusion))
accuracy

## [1] 1

library(ROCR)

## Warning: package 'ROCR' was built under R version 3.5.1

## Loading required package: gplots

## Warning: package 'gplots' was built under R version 3.5.1

## 
## Attaching package: 'gplots'

## The following object is masked from 'package:stats':
## 
##     lowess

rocrpred <- prediction(prob, Result)
rocrperf <- performance(rocrpred, 'tpr', 'fpr')
str(rocrperf)

## Formal class 'performance' [package "ROCR"] with 6 slots
##   ..@ x.name      : chr "False positive rate"
##   ..@ y.name      : chr "True positive rate"
##   ..@ alpha.name  : chr "Cutoff"
##   ..@ x.values    :List of 1
##   .. ..$ : num [1:10] 0 0 0 0 0 0 0 0.25 0.5 1
##   ..@ y.values    :List of 1
##   .. ..$ : num [1:10] 0 0.167 0.333 0.5 0.667 ...
##   ..@ alpha.values:List of 1
##   .. ..$ : num [1:10] Inf 1 1 1 1 ...

plot(rocrperf,colorize=T,text.adj=c(-0.2,1.7))