Logistic Regression

Assignment 3

Election Result Prediction

dataset <- read.csv("C:\\Users\\RISHI RAHUL\\Desktop\\Dataset\\6 Assignments\\5 Logisitc Regression\\election_data.csv")

sum(is.na(dataset))
## [1] 5
dataset <- na.omit(dataset) # Omitting NA values from the Data 

dim(dataset)
## [1] 10  5
colnames(dataset)
## [1] "Election.id"     "Result"          "Year"            "Amount.Spent"   
## [5] "Popularity.Rank"
dataset <- dataset[,-1] # Removing the first column which is is an Index

# GLM function use sigmoid curve to produce desirable results
model <- glm(Result~.,  data=dataset,   family = "binomial")
## Warning: glm.fit: algorithm did not converge
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
# To calculate the odds ratio manually we going r going to take exp of coef(model)
exp(coef(model))
##     (Intercept)            Year    Amount.Spent Popularity.Rank 
##    4.016574e+27    7.465078e+01    1.610260e+02    4.541122e-36
# Confusion matrix table 
prob <- predict(model,dataset,type="response")
summary(model)
## 
## Call:
## glm(formula = Result ~ ., family = "binomial", data = dataset)
## 
## Deviance Residuals: 
##        Min          1Q      Median          3Q         Max  
## -1.291e-05  -2.110e-08   2.110e-08   2.110e-08   1.829e-05  
## 
## Coefficients:
##                   Estimate Std. Error z value Pr(>|z|)
## (Intercept)         63.560 629369.770   0.000    1.000
## Year                 4.313  12237.762   0.000    1.000
## Amount.Spent         5.082 209842.916   0.000    1.000
## Popularity.Rank    -81.380 122426.442  -0.001    0.999
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 1.3460e+01  on 9  degrees of freedom
## Residual deviance: 6.5897e-10  on 6  degrees of freedom
## AIC: 8
## 
## Number of Fisher Scoring iterations: 25
# Confusion matrix and considering the threshold value as 0.5 
confusion<-table(prob>0.5,dataset$Result)
confusion
##        
##         0 1
##   FALSE 4 0
##   TRUE  0 6
# Model Accuracy 
Accuracy<-sum(diag(confusion)/sum(confusion))
Accuracy # 100%
## [1] 1
pred_values <- ifelse(prob>=0.5,1,0)
yes_no <- ifelse(prob>=0.5,"yes","no")

 # Creating new column to store the above values
dataset[,"prob"] <- prob
dataset[,"pred_values"] <- pred_values
dataset[,"yes_no"] <- yes_no


table(dataset$Result,dataset$pred_values)
##    
##     0 1
##   0 4 0
##   1 0 6