getwd()
## [1] "C:/Users/Admin/Documents/R studio STTP"
setwd("c:\\Users\\Admin\\Documents\\R studio STTP")
load dataset
crash_train<-read.csv("crashTest_1.csv")
crash_test<-read.csv("crashTest_1_TEST.csv")
if path is not set
data <- read.csv("c:/Users/Admin/Documents/R studio STTP/crashTest_1.csv")
firt few rows of dataset
head(crash_train,n=10)
## ManHI ManBI IntI HVACi Safety CarType
## 1 -5.27 -1.30 2.86 -4.85 4.04 SUV
## 2 -4.82 -5.38 9.72 -0.97 -4.57 Hatchback
## 3 9.57 -7.50 -7.61 1.33 -5.10 Hatchback
## 4 2.84 -2.85 0.92 5.51 -6.64 Hatchback
## 5 0.00 2.68 -4.15 0.85 5.58 SUV
## 6 0.40 6.34 0.83 5.03 -8.10 SUV
## 7 5.94 3.14 -6.65 6.62 -1.32 Hatchback
## 8 5.78 -1.75 -6.85 0.73 5.50 Hatchback
## 9 0.86 -4.32 8.10 -8.96 3.10 Hatchback
## 10 7.36 7.42 0.27 -8.62 3.08 SUV
View(crash_train)
View Structure of the data
str(crash_train)
## 'data.frame': 80 obs. of 6 variables:
## $ ManHI : num -5.27 -4.82 9.57 2.84 0 0.4 5.94 5.78 0.86 7.36 ...
## $ ManBI : num -1.3 -5.38 -7.5 -2.85 2.68 6.34 3.14 -1.75 -4.32 7.42 ...
## $ IntI : num 2.86 9.72 -7.61 0.92 -4.15 0.83 -6.65 -6.85 8.1 0.27 ...
## $ HVACi : num -4.85 -0.97 1.33 5.51 0.85 5.03 6.62 0.73 -8.96 -8.62 ...
## $ Safety : num 4.04 -4.57 -5.1 -6.64 5.58 -8.1 -1.32 5.5 3.1 3.08 ...
## $ CarType: chr "SUV" "Hatchback" "Hatchback" "Hatchback" ...
5point Summary of the dataset
summary(crash_train)
## ManHI ManBI IntI HVACi
## Min. :-9.9300 Min. :-9.9400 Min. :-9.9900 Min. :-9.8200
## 1st Qu.:-5.1950 1st Qu.:-5.7050 1st Qu.:-5.5725 1st Qu.:-5.6750
## Median : 0.6350 Median :-1.8150 Median :-0.4150 Median : 0.8700
## Mean :-0.0935 Mean :-0.9277 Mean :-0.1349 Mean : 0.1197
## 3rd Qu.: 5.0500 3rd Qu.: 3.4175 3rd Qu.: 4.9775 3rd Qu.: 5.0625
## Max. : 9.5700 Max. : 9.6100 Max. : 9.7200 Max. : 9.8900
## Safety CarType
## Min. :-9.8000 Length:80
## 1st Qu.:-4.6775 Class :character
## Median : 0.8300 Mode :character
## Mean : 0.5437
## 3rd Qu.: 4.6225
## Max. : 9.9900
convert car_type to binary
crash_train$CarType_binary <- ifelse(crash_train$CarType == "SUV", 1, 0)
crash_test$CarType_binary <- ifelse(crash_test$CarType == "SUV", 1, 0)
apply model
lfit <- glm(CarType_binary ~ ManHI + ManBI + IntI + HVACi + Safety,
data = crash_train, family = "binomial")
## Warning: glm.fit: algorithm did not converge
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
summary of lfit
summary(lfit)
##
## Call:
## glm(formula = CarType_binary ~ ManHI + ManBI + IntI + HVACi +
## Safety, family = "binomial", data = crash_train)
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -22.76 12007.54 -0.002 0.998
## ManHI -13.48 3077.29 -0.004 0.997
## ManBI 36.02 7221.18 0.005 0.996
## IntI -44.90 8853.08 -0.005 0.996
## HVACi -58.50 11461.92 -0.005 0.996
## Safety -27.36 5396.42 -0.005 0.996
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 1.0585e+02 on 79 degrees of freedom
## Residual deviance: 5.3590e-08 on 74 degrees of freedom
## AIC: 12
##
## Number of Fisher Scoring iterations: 25
predict data on model set threshold
logis_test <- predict(lfit, newdata = crash_test, type = "response")
predicted_test_class <- ifelse(logis_test > 0.5, "SUV", "Hatchback")
actual_test_class <- ifelse(crash_test$CarType_binary == 1, "SUV", "Hatchback")
import library fro confusion matrix
library(caret)
## Loading required package: ggplot2
## Loading required package: lattice
confusionMatrix(factor(predicted_test_class), factor(actual_test_class), positive = "SUV")
## Confusion Matrix and Statistics
##
## Reference
## Prediction Hatchback SUV
## Hatchback 10 1
## SUV 0 9
##
## Accuracy : 0.95
## 95% CI : (0.7513, 0.9987)
## No Information Rate : 0.5
## P-Value [Acc > NIR] : 2.003e-05
##
## Kappa : 0.9
##
## Mcnemar's Test P-Value : 1
##
## Sensitivity : 0.9000
## Specificity : 1.0000
## Pos Pred Value : 1.0000
## Neg Pred Value : 0.9091
## Prevalence : 0.5000
## Detection Rate : 0.4500
## Detection Prevalence : 0.4500
## Balanced Accuracy : 0.9500
##
## 'Positive' Class : SUV
##
give new entry to test the result
new_car <- data.frame(
ManHI = 1.94,
ManBI = 2.21,
IntI = 3.38,
HVACi = 1.78,
Safety = -7.19
)
predict result for new entry
logis_new <- predict(lfit, newdata = new_car, type = "response")
predicted_new <- ifelse(logis_new > 0.5, "SUV", "Hatchback")
display result
print(predicted_new)
## 1
## "Hatchback"