Assignment 17
mydata <- read.csv("C:\\Users\\RISHI RAHUL\\Desktop\\DS\\4 Logistic\\Assignment\\election_data.csv")
sum(is.na(mydata))
## [1] 5
mydata <- na.omit(mydata)
mydata <- mydata[-1]
attach(mydata)
summary(mydata)
## Result Year Amount.Spent Popularity.Rank
## Min. :0.0 Min. :32.00 Min. :2.930 Min. :1.00
## 1st Qu.:0.0 1st Qu.:39.25 1st Qu.:3.618 1st Qu.:2.00
## Median :1.0 Median :43.00 Median :4.005 Median :3.00
## Mean :0.6 Mean :43.30 Mean :4.229 Mean :2.70
## 3rd Qu.:1.0 3rd Qu.:49.50 3rd Qu.:4.470 3rd Qu.:3.75
## Max. :1.0 Max. :52.00 Max. :6.320 Max. :4.00
cor(mydata)
## Result Year Amount.Spent Popularity.Rank
## Result 1.0000000 0.50422535 0.29685038 -0.7793831
## Year 0.5042253 1.00000000 0.06568657 -0.5281382
## Amount.Spent 0.2968504 0.06568657 1.00000000 -0.1982179
## Popularity.Rank -0.7793831 -0.52813819 -0.19821792 1.0000000
# Logistic Regression Model
model <- glm(Result~ Year + Amount.Spent + factor(Popularity.Rank), mydata , family = "binomial")
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
summary(model)
##
## Call:
## glm(formula = Result ~ Year + Amount.Spent + factor(Popularity.Rank),
## family = "binomial", data = mydata)
##
## Deviance Residuals:
## 2 3 4 5 6 7
## -1.166e-05 2.110e-08 5.690e-06 -2.110e-08 7.560e-07 -2.110e-08
## 8 9 10 11
## 6.551e-06 1.259e-05 2.440e-06 -9.183e-06
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -218.740 675602.171 0 1
## Year 4.401 17137.652 0 1
## Amount.Spent 5.208 310985.668 0 1
## factor(Popularity.Rank)2 42.172 821079.594 0 1
## factor(Popularity.Rank)3 34.639 375325.352 0 1
## factor(Popularity.Rank)4 -48.761 330224.131 0 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 1.3460e+01 on 9 degrees of freedom
## Residual deviance: 4.6057e-10 on 4 degrees of freedom
## AIC: 12
##
## Number of Fisher Scoring iterations: 24
library(car)
## Warning: package 'car' was built under R version 3.5.1
## Loading required package: carData
library(MASS)
stepAIC(model)
## Start: AIC=12
## Result ~ Year + Amount.Spent + factor(Popularity.Rank)
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Df Deviance AIC
## - Amount.Spent 1 0.0000 10.000
## <none> 0.0000 12.000
## - Year 1 3.8177 13.818
## - factor(Popularity.Rank) 3 9.8091 15.809
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
##
## Step: AIC=10
## Result ~ Year + factor(Popularity.Rank)
##
## Df Deviance AIC
## <none> 0.0000 10.000
## - Year 1 3.8191 11.819
## - factor(Popularity.Rank) 3 10.7150 14.715
##
## Call: glm(formula = Result ~ Year + factor(Popularity.Rank), family = "binomial",
## data = mydata)
##
## Coefficients:
## (Intercept) Year
## -212.991 4.663
## factor(Popularity.Rank)2 factor(Popularity.Rank)3
## 55.654 40.360
## factor(Popularity.Rank)4
## -44.050
##
## Degrees of Freedom: 9 Total (i.e. Null); 5 Residual
## Null Deviance: 13.46
## Residual Deviance: 4.632e-10 AIC: 10
avPlots(model)
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

# Prediction Model
prob <- predict(model, mydata, type = "response")
prob <- as.data.frame(prob)
final <- cbind(mydata, prob)
confusion <- table(prob>0.5, Result)
confusion
## Result
## 0 1
## FALSE 4 0
## TRUE 0 6
accuracy <- sum(diag(confusion)/sum(confusion))
accuracy
## [1] 1
library(ROCR)
## Warning: package 'ROCR' was built under R version 3.5.1
## Loading required package: gplots
## Warning: package 'gplots' was built under R version 3.5.1
##
## Attaching package: 'gplots'
## The following object is masked from 'package:stats':
##
## lowess
rocrpred <- prediction(prob, Result)
rocrperf <- performance(rocrpred, 'tpr', 'fpr')
str(rocrperf)
## Formal class 'performance' [package "ROCR"] with 6 slots
## ..@ x.name : chr "False positive rate"
## ..@ y.name : chr "True positive rate"
## ..@ alpha.name : chr "Cutoff"
## ..@ x.values :List of 1
## .. ..$ : num [1:10] 0 0 0 0 0 0 0 0.25 0.5 1
## ..@ y.values :List of 1
## .. ..$ : num [1:10] 0 0.167 0.333 0.5 0.667 ...
## ..@ alpha.values:List of 1
## .. ..$ : num [1:10] Inf 1 1 1 1 ...
plot(rocrperf,colorize=T,text.adj=c(-0.2,1.7))
