install.packages("caret")
install.packages("RSADBE")
install.packages("rlang")
library(RSADBE)
library(caret)
library(rlang)
data("Gasoline")
gasolineData <- Gasoline
dim(gasolineData)
## [1] 25 12
head(gasolineData,1)
## y x1 x2 x3 x4 x5 x6 x7 x8 x9 x10 x11
## Apollo 18.9 350 165 260 0.8889 0.7191 4 3 200.3 69.9 3910 A
summary(gasolineData)
## y x1 x2 x3
## Min. :11.20 Min. : 85.3 Min. : 70.0 Min. : 81.0
## 1st Qu.:16.50 1st Qu.:225.0 1st Qu.:105.0 1st Qu.:164.0
## Median :19.70 Median :318.0 Median :140.0 Median :231.5
## Mean :20.62 Mean :287.6 Mean :137.7 Mean :215.0
## 3rd Qu.:22.12 3rd Qu.:351.0 3rd Qu.:165.0 3rd Qu.:256.2
## Max. :36.50 Max. :500.0 Max. :223.0 Max. :366.0
## NA's :1
## x4 x5 x6 x7
## Min. :0.8837 Min. :0.7101 Min. :1.0 Min. :3.00
## 1st Qu.:0.8889 1st Qu.:0.7319 1st Qu.:2.0 1st Qu.:3.00
## Median :0.8913 Median :0.7500 Median :2.0 Median :3.00
## Mean :0.8920 Mean :0.7523 Mean :2.6 Mean :3.36
## 3rd Qu.:0.8947 3rd Qu.:0.7630 3rd Qu.:4.0 3rd Qu.:3.00
## Max. :0.9000 Max. :0.8113 Max. :4.0 Max. :5.00
##
## x8 x9 x10 x11
## Min. :155.7 Min. :61.80 Min. :1905 A:18
## 1st Qu.:171.5 1st Qu.:65.40 1st Qu.:3020 M: 7
## Median :195.4 Median :72.20 Median :3850
## Mean :191.8 Mean :71.24 Mean :3612
## 3rd Qu.:200.3 3rd Qu.:76.30 3rd Qu.:4215
## Max. :231.0 Max. :79.80 Max. :5430
##
names(gasolineData)
## [1] "y" "x1" "x2" "x3" "x4" "x5" "x6" "x7" "x8" "x9" "x10"
## [12] "x11"
So as you can see there is 25 Rows and 12 Columns in ourdataset and we don’t have proper name of the columns. Let’s replace column names with proper names
names(gasolineData) <- c("Miles Per Gallon","Displacement","Horsepower","Torque","Compression Ratio",
"Rear Axle Ratio","Carburetor","Number Of Transmission Speeds","Overall Length",
"Width Inches","Weight Pounds","TransmissionType")
names(gasolineData)
## [1] "Miles Per Gallon" "Displacement"
## [3] "Horsepower" "Torque"
## [5] "Compression Ratio" "Rear Axle Ratio"
## [7] "Carburetor" "Number Of Transmission Speeds"
## [9] "Overall Length" "Width Inches"
## [11] "Weight Pounds" "TransmissionType"
Now our dataset looking more informative
splitData <- createDataPartition(gasolineData$TransmissionType, p= 0.70,list = FALSE)
trainingData <- gasolineData[splitData,]
testingData <- gasolineData[-splitData,]
logisticsModel <- glm(trainingData$TransmissionType~.,family="binomial",data = trainingData)
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
summary(logisticsModel)
##
## Call:
## glm(formula = trainingData$TransmissionType ~ ., family = "binomial",
## data = trainingData)
##
## Deviance Residuals:
## Nova Monarch Jenson Conv. Skyhawk Scirocco
## -2.565e-06 -2.431e-06 -5.158e-06 -3.055e-06 4.616e-06
## Corolla SR-5 Camaro Capri II Granada Eldorado
## 2.110e-08 -5.799e-06 7.136e-06 -7.939e-06 -2.753e-06
## Starfire Cordoba Corolla E-S Mark IV Celica GT
## -5.431e-06 -5.084e-06 6.221e-06 -2.990e-06 2.110e-08
## Cougar Corvette
## -2.110e-08 -5.256e-07
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 9.268e+02 7.001e+07 0 1
## `Miles Per Gallon` 3.659e-01 5.313e+04 0 1
## Displacement -3.405e-01 2.270e+04 0 1
## Horsepower -3.212e-01 2.936e+04 0 1
## Torque 5.329e-01 4.652e+04 0 1
## `Compression Ratio` -9.742e+02 7.961e+07 0 1
## `Rear Axle Ratio` -3.144e+02 1.423e+07 0 1
## Carburetor 1.917e+00 1.582e+05 0 1
## `Number Of Transmission Speeds` 4.164e+01 8.513e+05 0 1
## `Overall Length` -2.192e+00 5.352e+04 0 1
## `Width Inches` 5.391e+00 8.802e+04 0 1
## `Weight Pounds` 2.103e-02 1.581e+03 0 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 2.0597e+01 on 16 degrees of freedom
## Residual deviance: 3.2815e-10 on 5 degrees of freedom
## (1 observation deleted due to missingness)
## AIC: 24
##
## Number of Fisher Scoring iterations: 24
pred <- predict(logisticsModel,newdata = testingData,type = "response")
Now we will see that how much our model is accurate
y_pred_num <- ifelse(pred > 0.5,"M","A")
y_pred <- factor(y_pred_num,levels = c('A','M'))
y_act <- testingData$TransmissionType
mean(y_pred == y_act)
## [1] 0.7142857
SO as you can see our model is 71.4% Accurate