#Class Exercise: Logistic Regression Analysis”

Introduction

** This document provides an analysis of logistic regression for predicting the probability of tire purchase based on performance ratings (Wet and Noise). The steps include data import, cleaning, model creation, and probability estimation.**

# Load the required libraries
library(readxl)  # For importing Excel files
library(Hmisc)   # For correlation
## 
## Attaching package: 'Hmisc'
## The following objects are masked from 'package:base':
## 
##     format.pval, units
library(pscl)    # For McFadden R-squared
## Classes and Methods for R originally developed in the
## Political Science Computational Laboratory
## Department of Political Science
## Stanford University (2002-2015),
## by and under the direction of Simon Jackman.
## hurdle and zeroinfl functions by Achim Zeileis.
library(pROC)    # For AUC
## Type 'citation("pROC")' for a citation.
## 
## Attaching package: 'pROC'
## The following objects are masked from 'package:stats':
## 
##     cov, smooth, var
# Load the data
data <- read_excel("Class Exercise 15_TireRatings.xlsx") # Select the appropriate file interactively

# Clean the data
coll_data <- subset(data, select = -c(Tire)) # Drop the "Tire" column

# Preview and summarize the data
head(coll_data)
## # A tibble: 6 × 4
##     Wet Noise Buy_Again Purchase
##   <dbl> <dbl>     <dbl>    <dbl>
## 1   8     7.2       6.1        0
## 2   8     7.2       6.6        1
## 3   7.6   7.5       6.9        1
## 4   6.6   5.4       6.6        0
## 5   5.8   6.3       4          0
## 6   6.3   5.7       4.5        0
summary(coll_data)
##       Wet            Noise         Buy_Again        Purchase     
##  Min.   :4.300   Min.   :3.600   Min.   :1.400   Min.   :0.0000  
##  1st Qu.:6.450   1st Qu.:6.000   1st Qu.:3.850   1st Qu.:0.0000  
##  Median :7.750   Median :7.100   Median :6.150   Median :0.0000  
##  Mean   :7.315   Mean   :6.903   Mean   :5.657   Mean   :0.4412  
##  3rd Qu.:8.225   3rd Qu.:7.925   3rd Qu.:7.400   3rd Qu.:1.0000  
##  Max.   :9.200   Max.   :8.900   Max.   :8.900   Max.   :1.0000
# Compute correlation matrix
corr <- rcorr(as.matrix(coll_data))
corr
##            Wet Noise Buy_Again Purchase
## Wet       1.00  0.76      0.91     0.74
## Noise     0.76  1.00      0.83     0.72
## Buy_Again 0.91  0.83      1.00     0.83
## Purchase  0.74  0.72      0.83     1.00
## 
## n= 68 
## 
## 
## P
##           Wet Noise Buy_Again Purchase
## Wet            0     0         0      
## Noise      0         0         0      
## Buy_Again  0   0               0      
## Purchase   0   0     0
# Fit logistic regression model
model <- glm(Purchase ~ Wet + Noise, data = data, family = binomial)

# Display the model summary
summary(model)
## 
## Call:
## glm(formula = Purchase ~ Wet + Noise, family = binomial, data = data)
## 
## Coefficients:
##             Estimate Std. Error z value Pr(>|z|)   
## (Intercept) -39.4982    12.4779  -3.165  0.00155 **
## Wet           3.3745     1.2641   2.670  0.00760 **
## Noise         1.8163     0.8312   2.185  0.02887 * 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 93.325  on 67  degrees of freedom
## Residual deviance: 27.530  on 65  degrees of freedom
## AIC: 33.53
## 
## Number of Fisher Scoring iterations: 8
# Null model (for comparison)
null_model <- glm(Purchase ~ 1, data = data, family = binomial)

# Chi-squared test
anova(null_model, model, test = "Chisq")
## Analysis of Deviance Table
## 
## Model 1: Purchase ~ 1
## Model 2: Purchase ~ Wet + Noise
##   Resid. Df Resid. Dev Df Deviance  Pr(>Chi)    
## 1        67     93.325                          
## 2        65     27.530  2   65.795 5.162e-15 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
# McFadden R-squared
pR2(model)
## fitting null model for pseudo-r2
##         llh     llhNull          G2    McFadden        r2ML        r2CU 
## -13.7649516 -46.6623284  65.7947536   0.7050093   0.6199946   0.8305269
# Extract coefficients
coefficients <- coef(model)
intercept <- coefficients[1]
wet_coef <- coefficients[2]
noise_coef <- coefficients[3]

# Define input values
wet_rating <- 8
noise_rating <- 8

# Calculate logit and probability
logit <- intercept + wet_coef * wet_rating + noise_coef * noise_rating
probability <- exp(logit) / (1 + exp(logit))

# Print the result
cat("Estimated Probability of Purchase for Wet = 8 and Noise = 8:", round(probability, 4))
## Estimated Probability of Purchase for Wet = 8 and Noise = 8: 0.8837
# Define new input values
wet_rating <- 7
noise_rating <- 7

# Calculate logit and probability
logit <- intercept + wet_coef * wet_rating + noise_coef * noise_rating
probability <- exp(logit) / (1 + exp(logit))

# Print the result
cat("Estimated Probability of Purchase for Wet = 7 and Noise = 7:", round(probability, 4))
## Estimated Probability of Purchase for Wet = 7 and Noise = 7: 0.0406