Election Result Prediction
dataset <- read.csv("C:\\Users\\RISHI RAHUL\\Desktop\\Dataset\\6 Assignments\\5 Logisitc Regression\\election_data.csv")
sum(is.na(dataset))
## [1] 5
dataset <- na.omit(dataset) # Omitting NA values from the Data
dim(dataset)
## [1] 10 5
colnames(dataset)
## [1] "Election.id" "Result" "Year" "Amount.Spent"
## [5] "Popularity.Rank"
dataset <- dataset[,-1] # Removing the first column which is is an Index
# GLM function use sigmoid curve to produce desirable results
model <- glm(Result~., data=dataset, family = "binomial")
## Warning: glm.fit: algorithm did not converge
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
# To calculate the odds ratio manually we going r going to take exp of coef(model)
exp(coef(model))
## (Intercept) Year Amount.Spent Popularity.Rank
## 4.016574e+27 7.465078e+01 1.610260e+02 4.541122e-36
# Confusion matrix table
prob <- predict(model,dataset,type="response")
summary(model)
##
## Call:
## glm(formula = Result ~ ., family = "binomial", data = dataset)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -1.291e-05 -2.110e-08 2.110e-08 2.110e-08 1.829e-05
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 63.560 629369.770 0.000 1.000
## Year 4.313 12237.762 0.000 1.000
## Amount.Spent 5.082 209842.916 0.000 1.000
## Popularity.Rank -81.380 122426.442 -0.001 0.999
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 1.3460e+01 on 9 degrees of freedom
## Residual deviance: 6.5897e-10 on 6 degrees of freedom
## AIC: 8
##
## Number of Fisher Scoring iterations: 25
# Confusion matrix and considering the threshold value as 0.5
confusion<-table(prob>0.5,dataset$Result)
confusion
##
## 0 1
## FALSE 4 0
## TRUE 0 6
# Model Accuracy
Accuracy<-sum(diag(confusion)/sum(confusion))
Accuracy # 100%
## [1] 1
pred_values <- ifelse(prob>=0.5,1,0)
yes_no <- ifelse(prob>=0.5,"yes","no")
# Creating new column to store the above values
dataset[,"prob"] <- prob
dataset[,"pred_values"] <- pred_values
dataset[,"yes_no"] <- yes_no
table(dataset$Result,dataset$pred_values)
##
## 0 1
## 0 4 0
## 1 0 6