install.packages (“MASS”) install.packages (“caret”) install.packages (“car”) install.packages (“carData”)
elec <- read.csv("D:\\Users\\jayapate\\Downloads\\election_data.csv")
elec <- elec[-1,]
View(elec)
attach(elec)
summary(elec)
## Election.id Result Year Amount.Spent
## Min. :122.0 Min. :0.0 Min. :32.00 Min. :2.930
## 1st Qu.:202.2 1st Qu.:0.0 1st Qu.:39.25 1st Qu.:3.618
## Median :362.5 Median :1.0 Median :43.00 Median :4.005
## Mean :451.6 Mean :0.6 Mean :43.30 Mean :4.229
## 3rd Qu.:710.2 3rd Qu.:1.0 3rd Qu.:49.50 3rd Qu.:4.470
## Max. :965.0 Max. :1.0 Max. :52.00 Max. :6.320
## Popularity.Rank
## Min. :1.00
## 1st Qu.:2.00
## Median :3.00
## Mean :2.70
## 3rd Qu.:3.75
## Max. :4.00
elec1 <- elec
#Finding the Linear Regression
colnames(elec1)
## [1] "Election.id" "Result" "Year" "Amount.Spent"
## [5] "Popularity.Rank"
elec1 <- lm(Result~Year+Amount.Spent+Popularity.Rank)
summary(elec1)
##
## Call:
## lm(formula = Result ~ Year + Amount.Spent + Popularity.Rank)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.36265 -0.15265 -0.09902 0.08992 0.55615
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 0.65329 1.31682 0.496 0.6375
## Year 0.01021 0.02151 0.475 0.6517
## Amount.Spent 0.07523 0.12208 0.616 0.5604
## Popularity.Rank -0.30137 0.13057 -2.308 0.0604 .
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.3784 on 6 degrees of freedom
## Multiple R-squared: 0.642, Adjusted R-squared: 0.463
## F-statistic: 3.586 on 3 and 6 DF, p-value: 0.08576
#Linear Regression cannot be applied. So go for Logistic Regression
elec2 <- glm(Result~Year+Amount.Spent+Popularity.Rank)
summary(elec2)
##
## Call:
## glm(formula = Result ~ Year + Amount.Spent + Popularity.Rank)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -0.36265 -0.15265 -0.09902 0.08992 0.55615
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 0.65329 1.31682 0.496 0.6375
## Year 0.01021 0.02151 0.475 0.6517
## Amount.Spent 0.07523 0.12208 0.616 0.5604
## Popularity.Rank -0.30137 0.13057 -2.308 0.0604 .
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for gaussian family taken to be 0.1432053)
##
## Null deviance: 2.40000 on 9 degrees of freedom
## Residual deviance: 0.85923 on 6 degrees of freedom
## AIC: 13.836
##
## Number of Fisher Scoring iterations: 2
library ("car")
## Warning: package 'car' was built under R version 3.5.1
## Loading required package: carData
library ("caret")
## Warning: package 'caret' was built under R version 3.5.1
## Loading required package: lattice
## Loading required package: ggplot2
## Warning: package 'ggplot2' was built under R version 3.5.1
library ("MASS")
## Warning: package 'MASS' was built under R version 3.5.1
x<-stepAIC(elec2)
## Start: AIC=13.84
## Result ~ Year + Amount.Spent + Popularity.Rank
##
## Df Deviance AIC
## - Year 1 0.89152 12.205
## - Amount.Spent 1 0.91361 12.449
## <none> 0.85923 13.836
## - Popularity.Rank 1 1.62217 18.191
##
## Step: AIC=12.2
## Result ~ Amount.Spent + Popularity.Rank
##
## Df Deviance AIC
## - Amount.Spent 1 0.94215 10.757
## <none> 0.89152 12.205
## - Popularity.Rank 1 2.18851 19.185
##
## Step: AIC=10.76
## Result ~ Popularity.Rank
##
## Df Deviance AIC
## <none> 0.94215 10.757
## - Popularity.Rank 1 2.40000 18.108
vif(elec2)
## Year Amount.Spent Popularity.Rank
## 1.389879 1.043188 1.440479
coef(elec2)
## (Intercept) Year Amount.Spent Popularity.Rank
## 0.65329307 0.01021448 0.07522508 -0.30137290
#confusion Matrix Table
prob <- predict(elec2, type=c("response"),elec)
prob <- as.data.frame(prob)
final <- cbind(prob, elec)
confusion <- table(prob > 0.5, elec$Result)
table (prob > 0.5)
##
## FALSE TRUE
## 5 5
#Now as only Popularity Rank is considered, build Logistic Regression on Popularity Rank
elec3 <- glm(Result~Popularity.Rank)
summary(elec3)
##
## Call:
## glm(formula = Result ~ Popularity.Rank)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -0.4959 -0.1797 -0.1488 0.1570 0.5041
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 1.53719 0.28763 5.344 0.000691 ***
## Popularity.Rank -0.34711 0.09866 -3.518 0.007865 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for gaussian family taken to be 0.1177686)
##
## Null deviance: 2.40000 on 9 degrees of freedom
## Residual deviance: 0.94215 on 8 degrees of freedom
## AIC: 10.757
##
## Number of Fisher Scoring iterations: 2
x<-stepAIC(elec3)
## Start: AIC=10.76
## Result ~ Popularity.Rank
##
## Df Deviance AIC
## <none> 0.94215 10.757
## - Popularity.Rank 1 2.40000 18.108
vif(elec1)
## Year Amount.Spent Popularity.Rank
## 1.389879 1.043188 1.440479
coef(elec3)
## (Intercept) Popularity.Rank
## 1.5371901 -0.3471074
#confusion Matrix Table
prob1 <- predict(elec3, type=c("response"),elec)
prob1 <- as.data.frame(prob1)
final <- cbind(prob1, elec)
confusion <- table(prob1 > 0.5, elec$Result)
confusion
##
## 0 1
## FALSE 4 2
## TRUE 0 4
table (prob1 > 0.5)
##
## FALSE TRUE
## 6 4
accuracy <- sum(diag(confusion)/sum(confusion))
accuracy
## [1] 0.8