Title: “logistic_regression”
Find the current Directory
getwd()
## [1] "C:/Users/Admin/Documents/R studio STTP"
Change the directory to working directory
setwd("c:\\Users\\Admin\\Documents\\R studio STTP")
Load Dataset
crash_train<-read.csv("crashTest_1.csv")
crash_test<-read.csv("crashTest_1_TEST.csv")
firts few rows of dataset
head(crash_train)
## CarID ManHI ManBI IntI HVACi Safety CarType
## 1 1 -5.27 -1.30 2.86 -4.85 4.04 SUV
## 2 2 -4.82 -5.38 9.72 -0.97 -4.57 Hatchback
## 3 3 9.57 -7.50 -7.61 1.33 -5.10 Hatchback
## 4 4 2.84 -2.85 0.92 5.51 -6.64 Hatchback
## 5 5 0.00 2.68 -4.15 0.85 5.58 SUV
## 6 6 0.40 6.34 0.83 5.03 -8.10 SUV
view Dataset
View(crash_train)
View Structure of the data
str(crash_train)
## 'data.frame': 80 obs. of 7 variables:
## $ CarID : int 1 2 3 4 5 6 7 8 9 10 ...
## $ ManHI : num -5.27 -4.82 9.57 2.84 0 0.4 5.94 5.78 0.86 7.36 ...
## $ ManBI : num -1.3 -5.38 -7.5 -2.85 2.68 6.34 3.14 -1.75 -4.32 7.42 ...
## $ IntI : num 2.86 9.72 -7.61 0.92 -4.15 0.83 -6.65 -6.85 8.1 0.27 ...
## $ HVACi : num -4.85 -0.97 1.33 5.51 0.85 5.03 6.62 0.73 -8.96 -8.62 ...
## $ Safety : num 4.04 -4.57 -5.1 -6.64 5.58 -8.1 -1.32 5.5 3.1 3.08 ...
## $ CarType: chr "SUV" "Hatchback" "Hatchback" "Hatchback" ...
5 point Summary of the dataset
summary(crash_train)
## CarID ManHI ManBI IntI
## Min. : 1.00 Min. :-9.9300 Min. :-9.9400 Min. :-9.9900
## 1st Qu.:20.75 1st Qu.:-5.1950 1st Qu.:-5.7050 1st Qu.:-5.5725
## Median :40.50 Median : 0.6350 Median :-1.8150 Median :-0.4150
## Mean :40.50 Mean :-0.0935 Mean :-0.9277 Mean :-0.1349
## 3rd Qu.:60.25 3rd Qu.: 5.0500 3rd Qu.: 3.4175 3rd Qu.: 4.9775
## Max. :80.00 Max. : 9.5700 Max. : 9.6100 Max. : 9.7200
## HVACi Safety CarType
## Min. :-9.8200 Min. :-9.8000 Length:80
## 1st Qu.:-5.6750 1st Qu.:-4.6775 Class :character
## Median : 0.8700 Median : 0.8300 Mode :character
## Mean : 0.1197 Mean : 0.5437
## 3rd Qu.: 5.0625 3rd Qu.: 4.6225
## Max. : 9.8900 Max. : 9.9900
checking if class attribute “CarType” is binary (0/1)
crash_train$CarType_binary <- ifelse(crash_train$CarType == "SUV", 1, 0)
Fit logistic regression model
lfit <-glm(CarType_binary ~ ., family = binomial, data = crash_train)
understand
summary of lfit
summary(lfit)
##
## Call:
## glm(formula = CarType_binary ~ ., family = binomial, data = crash_train)
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -2.657e+01 9.787e+04 0 1
## CarID 3.641e-13 1.765e+03 0 1
## ManHI -3.035e-12 7.249e+03 0 1
## ManBI 3.271e-13 7.978e+03 0 1
## IntI -3.977e-12 8.685e+03 0 1
## HVACi -4.546e-12 1.009e+04 0 1
## Safety -1.522e-12 7.770e+03 0 1
## CarTypeSUV 5.313e+01 1.347e+05 0 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 1.0585e+02 on 79 degrees of freedom
## Residual deviance: 4.6413e-10 on 72 degrees of freedom
## AIC: 16
##
## Number of Fisher Scoring iterations: 25
#maximum likelhood estimation derivate of newton graphison method
Find the probability
logtrain<-predict(lfit,type='response')
plotting the probability
plot(logtrain)
tapply(logtrain,
crash_train$CarType,
mean)
## Hatchback SUV
## 2.900701e-12 1.000000e+00
logispred<-predict(lfit,newdata=crash_test,Type='response')
plot(logispred)
logispred
## 1 2 3 4 5 6 7 8
## -26.56607 26.56607 -26.56607 -26.56607 -26.56607 -26.56607 26.56607 26.56607
## 9 10 11 12 13 14 15 16
## 26.56607 26.56607 26.56607 -26.56607 26.56607 26.56607 26.56607 26.56607
## 17 18 19 20
## -26.56607 -26.56607 -26.56607 -26.56607
lets classify test point as hatchback or SUV by setting a threshold
crash_test[logispred<=0.5,"logispred"]<-"Hatchback"
crash_test[logispred>0.5,"logispred"]<-"SUV"
confusionMatrix(table(crash_test[,8],crash_test[,7]),positive = 'Hatchback')
## Confusion Matrix and Statistics
##
##
## Hatchback SUV
## Hatchback 10 0
## SUV 0 10
##
## Accuracy : 1
## 95% CI : (0.8316, 1)
## No Information Rate : 0.5
## P-Value [Acc > NIR] : 9.537e-07
##
## Kappa : 1
##
## Mcnemar's Test P-Value : NA
##
## Sensitivity : 1.0
## Specificity : 1.0
## Pos Pred Value : 1.0
## Neg Pred Value : 1.0
## Prevalence : 0.5
## Detection Rate : 0.5
## Detection Prevalence : 0.5
## Balanced Accuracy : 1.0
##
## 'Positive' Class : Hatchback
##