#Class Exercise: Logistic Regression Analysis”
** This document provides an analysis of logistic regression for predicting the probability of tire purchase based on performance ratings (Wet and Noise). The steps include data import, cleaning, model creation, and probability estimation.**
# Load the required libraries
library(readxl) # For importing Excel files
library(Hmisc) # For correlation
##
## Attaching package: 'Hmisc'
## The following objects are masked from 'package:base':
##
## format.pval, units
library(pscl) # For McFadden R-squared
## Classes and Methods for R originally developed in the
## Political Science Computational Laboratory
## Department of Political Science
## Stanford University (2002-2015),
## by and under the direction of Simon Jackman.
## hurdle and zeroinfl functions by Achim Zeileis.
library(pROC) # For AUC
## Type 'citation("pROC")' for a citation.
##
## Attaching package: 'pROC'
## The following objects are masked from 'package:stats':
##
## cov, smooth, var
# Load the data
data <- read_excel("Class Exercise 15_TireRatings.xlsx") # Select the appropriate file interactively
# Clean the data
coll_data <- subset(data, select = -c(Tire)) # Drop the "Tire" column
# Preview and summarize the data
head(coll_data)
## # A tibble: 6 × 4
## Wet Noise Buy_Again Purchase
## <dbl> <dbl> <dbl> <dbl>
## 1 8 7.2 6.1 0
## 2 8 7.2 6.6 1
## 3 7.6 7.5 6.9 1
## 4 6.6 5.4 6.6 0
## 5 5.8 6.3 4 0
## 6 6.3 5.7 4.5 0
summary(coll_data)
## Wet Noise Buy_Again Purchase
## Min. :4.300 Min. :3.600 Min. :1.400 Min. :0.0000
## 1st Qu.:6.450 1st Qu.:6.000 1st Qu.:3.850 1st Qu.:0.0000
## Median :7.750 Median :7.100 Median :6.150 Median :0.0000
## Mean :7.315 Mean :6.903 Mean :5.657 Mean :0.4412
## 3rd Qu.:8.225 3rd Qu.:7.925 3rd Qu.:7.400 3rd Qu.:1.0000
## Max. :9.200 Max. :8.900 Max. :8.900 Max. :1.0000
# Compute correlation matrix
corr <- rcorr(as.matrix(coll_data))
corr
## Wet Noise Buy_Again Purchase
## Wet 1.00 0.76 0.91 0.74
## Noise 0.76 1.00 0.83 0.72
## Buy_Again 0.91 0.83 1.00 0.83
## Purchase 0.74 0.72 0.83 1.00
##
## n= 68
##
##
## P
## Wet Noise Buy_Again Purchase
## Wet 0 0 0
## Noise 0 0 0
## Buy_Again 0 0 0
## Purchase 0 0 0
# Fit logistic regression model
model <- glm(Purchase ~ Wet + Noise, data = data, family = binomial)
# Display the model summary
summary(model)
##
## Call:
## glm(formula = Purchase ~ Wet + Noise, family = binomial, data = data)
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -39.4982 12.4779 -3.165 0.00155 **
## Wet 3.3745 1.2641 2.670 0.00760 **
## Noise 1.8163 0.8312 2.185 0.02887 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 93.325 on 67 degrees of freedom
## Residual deviance: 27.530 on 65 degrees of freedom
## AIC: 33.53
##
## Number of Fisher Scoring iterations: 8
# Null model (for comparison)
null_model <- glm(Purchase ~ 1, data = data, family = binomial)
# Chi-squared test
anova(null_model, model, test = "Chisq")
## Analysis of Deviance Table
##
## Model 1: Purchase ~ 1
## Model 2: Purchase ~ Wet + Noise
## Resid. Df Resid. Dev Df Deviance Pr(>Chi)
## 1 67 93.325
## 2 65 27.530 2 65.795 5.162e-15 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
# McFadden R-squared
pR2(model)
## fitting null model for pseudo-r2
## llh llhNull G2 McFadden r2ML r2CU
## -13.7649516 -46.6623284 65.7947536 0.7050093 0.6199946 0.8305269
# Extract coefficients
coefficients <- coef(model)
intercept <- coefficients[1]
wet_coef <- coefficients[2]
noise_coef <- coefficients[3]
# Define input values
wet_rating <- 8
noise_rating <- 8
# Calculate logit and probability
logit <- intercept + wet_coef * wet_rating + noise_coef * noise_rating
probability <- exp(logit) / (1 + exp(logit))
# Print the result
cat("Estimated Probability of Purchase for Wet = 8 and Noise = 8:", round(probability, 4))
## Estimated Probability of Purchase for Wet = 8 and Noise = 8: 0.8837
# Define new input values
wet_rating <- 7
noise_rating <- 7
# Calculate logit and probability
logit <- intercept + wet_coef * wet_rating + noise_coef * noise_rating
probability <- exp(logit) / (1 + exp(logit))
# Print the result
cat("Estimated Probability of Purchase for Wet = 7 and Noise = 7:", round(probability, 4))
## Estimated Probability of Purchase for Wet = 7 and Noise = 7: 0.0406