logistic_regression

getwd()

## [1] "C:/Users/Admin/Documents/R studio STTP"

setwd("c:\\Users\\Admin\\Documents\\R studio STTP")

load dataset

crash_train<-read.csv("crashTest_1.csv")
crash_test<-read.csv("crashTest_1_TEST.csv")

if path is not set

data <- read.csv("c:/Users/Admin/Documents/R studio STTP/crashTest_1.csv")

firt few rows of dataset

head(crash_train,n=10)

##    ManHI ManBI  IntI HVACi Safety   CarType
## 1  -5.27 -1.30  2.86 -4.85   4.04       SUV
## 2  -4.82 -5.38  9.72 -0.97  -4.57 Hatchback
## 3   9.57 -7.50 -7.61  1.33  -5.10 Hatchback
## 4   2.84 -2.85  0.92  5.51  -6.64 Hatchback
## 5   0.00  2.68 -4.15  0.85   5.58       SUV
## 6   0.40  6.34  0.83  5.03  -8.10       SUV
## 7   5.94  3.14 -6.65  6.62  -1.32 Hatchback
## 8   5.78 -1.75 -6.85  0.73   5.50 Hatchback
## 9   0.86 -4.32  8.10 -8.96   3.10 Hatchback
## 10  7.36  7.42  0.27 -8.62   3.08       SUV

View(crash_train)

View Structure of the data

str(crash_train)

## 'data.frame':    80 obs. of  6 variables:
##  $ ManHI  : num  -5.27 -4.82 9.57 2.84 0 0.4 5.94 5.78 0.86 7.36 ...
##  $ ManBI  : num  -1.3 -5.38 -7.5 -2.85 2.68 6.34 3.14 -1.75 -4.32 7.42 ...
##  $ IntI   : num  2.86 9.72 -7.61 0.92 -4.15 0.83 -6.65 -6.85 8.1 0.27 ...
##  $ HVACi  : num  -4.85 -0.97 1.33 5.51 0.85 5.03 6.62 0.73 -8.96 -8.62 ...
##  $ Safety : num  4.04 -4.57 -5.1 -6.64 5.58 -8.1 -1.32 5.5 3.1 3.08 ...
##  $ CarType: chr  "SUV" "Hatchback" "Hatchback" "Hatchback" ...

5point Summary of the dataset

summary(crash_train)

##      ManHI             ManBI              IntI             HVACi        
##  Min.   :-9.9300   Min.   :-9.9400   Min.   :-9.9900   Min.   :-9.8200  
##  1st Qu.:-5.1950   1st Qu.:-5.7050   1st Qu.:-5.5725   1st Qu.:-5.6750  
##  Median : 0.6350   Median :-1.8150   Median :-0.4150   Median : 0.8700  
##  Mean   :-0.0935   Mean   :-0.9277   Mean   :-0.1349   Mean   : 0.1197  
##  3rd Qu.: 5.0500   3rd Qu.: 3.4175   3rd Qu.: 4.9775   3rd Qu.: 5.0625  
##  Max.   : 9.5700   Max.   : 9.6100   Max.   : 9.7200   Max.   : 9.8900  
##      Safety          CarType         
##  Min.   :-9.8000   Length:80         
##  1st Qu.:-4.6775   Class :character  
##  Median : 0.8300   Mode  :character  
##  Mean   : 0.5437                     
##  3rd Qu.: 4.6225                     
##  Max.   : 9.9900

convert car_type to binary

crash_train$CarType_binary <- ifelse(crash_train$CarType == "SUV", 1, 0)
crash_test$CarType_binary  <- ifelse(crash_test$CarType == "SUV", 1, 0)

apply model

lfit <- glm(CarType_binary ~ ManHI + ManBI + IntI + HVACi + Safety,
            data = crash_train, family = "binomial")

## Warning: glm.fit: algorithm did not converge

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

summary of lfit

summary(lfit)

## 
## Call:
## glm(formula = CarType_binary ~ ManHI + ManBI + IntI + HVACi + 
##     Safety, family = "binomial", data = crash_train)
## 
## Coefficients:
##             Estimate Std. Error z value Pr(>|z|)
## (Intercept)   -22.76   12007.54  -0.002    0.998
## ManHI         -13.48    3077.29  -0.004    0.997
## ManBI          36.02    7221.18   0.005    0.996
## IntI          -44.90    8853.08  -0.005    0.996
## HVACi         -58.50   11461.92  -0.005    0.996
## Safety        -27.36    5396.42  -0.005    0.996
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 1.0585e+02  on 79  degrees of freedom
## Residual deviance: 5.3590e-08  on 74  degrees of freedom
## AIC: 12
## 
## Number of Fisher Scoring iterations: 25

predict data on model set threshold

logis_test <- predict(lfit, newdata = crash_test, type = "response")
predicted_test_class <- ifelse(logis_test > 0.5, "SUV", "Hatchback")
actual_test_class <- ifelse(crash_test$CarType_binary == 1, "SUV", "Hatchback")

import library fro confusion matrix

library(caret)

## Loading required package: ggplot2

## Loading required package: lattice

confusionMatrix(factor(predicted_test_class), factor(actual_test_class), positive = "SUV")

## Confusion Matrix and Statistics
## 
##            Reference
## Prediction  Hatchback SUV
##   Hatchback        10   1
##   SUV               0   9
##                                           
##                Accuracy : 0.95            
##                  95% CI : (0.7513, 0.9987)
##     No Information Rate : 0.5             
##     P-Value [Acc > NIR] : 2.003e-05       
##                                           
##                   Kappa : 0.9             
##                                           
##  Mcnemar's Test P-Value : 1               
##                                           
##             Sensitivity : 0.9000          
##             Specificity : 1.0000          
##          Pos Pred Value : 1.0000          
##          Neg Pred Value : 0.9091          
##              Prevalence : 0.5000          
##          Detection Rate : 0.4500          
##    Detection Prevalence : 0.4500          
##       Balanced Accuracy : 0.9500          
##                                           
##        'Positive' Class : SUV             
##

give new entry to test the result

new_car <- data.frame(
  ManHI = 1.94,
  ManBI = 2.21,
  IntI = 3.38,
  HVACi = 1.78,
  Safety = -7.19
)

predict result for new entry

logis_new <- predict(lfit, newdata = new_car, type = "response")
predicted_new <- ifelse(logis_new > 0.5, "SUV", "Hatchback")

display result

print(predicted_new)

##           1 
## "Hatchback"

logistic_regression_demo1

Ashwini Deshmukh

2025-06-19