Class Exercise 15

Question 1

Step 1: Install and load the readxl package (if not already installed)

install.packages("readxl")  # Install the package
library(readxl)             # Load the package

Step 2: Read the data from an Excel file

data <- read_excel(file.choose())  # Select the file manually to load the data

Step 3: Ensure the data is in the correct format

data <- as.data.frame(data)  # Ensure the data is in a data frame format
head(data)                   # Display the first few rows of the dataset
colnames(data)               # Check column names to verify correctness

Step 4: Rename columns if the column names are incorrect

colnames(data) <- c("Tire", "Wet", "Noise", "Buy_Again", "Purchase")  # Rename columns

Step 5: Build a logistic regression model

model <- glm(Purchase ~ Wet + Noise, data = data, family = binomial(link = “logit”))

Step 6: View the model summary

summary(model)

Step 7: Extract coefficients from the model

coefficients <- coef(model)
b0 <- coefficients[1]  # Intercept
b1 <- coefficients[2]  # Coefficient for Wet
b2 <- coefficients[3]  # Coefficient for Noise

Step 8: Print the results

cat("b0 (Intercept):", b0, "\n")
cat("b1 (Wet):", b1, "\n")
cat("b2 (Noise):", b2, "\n")

Question 2

Step 1: View the model summary

summary(model)

Step 2: Extract p-values for each coefficient

p_values <- summary(model)$coefficients[, 4]  # Extract the p-values from the model summary

Step 3: Determine if coefficients are significant

significant <- p_values < 0.05  # Check if p-values are less than alpha = 0.05

Step 4: Label the p-values for better readability

names(p_values) <- c("Intercept", "Wet", "Noise")  # Add labels for each p-value
cat("P-values:\n")
print(p_values)

Step 5: Display significance of each variable

cat("\nSignificance (alpha = 0.05):\n")
print(significant)

Step 6: Check if all independent variables are significant

if (significant["Wet"] && significant["Noise"]) {
  cat("\nBoth independent variables (Wet and Noise) are significant.\n")
} else {
  cat("\nNot all independent variables are significant.\n")
}

Question 3

Step 1: Extract null deviance (deviance of the model without independent variables)

null_deviance <- model$null.deviance  # Null Deviance: Model with only the intercept

Step 2: Extract residual deviance (deviance of the model with independent variables)

residual_deviance <- model$deviance  # Residual Deviance: Model with the predictors (Wet and Noise)

Step 3: Calculate McFadden R-Squared using the formula

mcfadden_r2 <- 1 - (residual_deviance / null_deviance)

Step 4: Print the result, rounded to two decimal places

cat("McFadden R-Squared:", round(mcfadden_r2, 2), "\n")

Question 4

Step 1: Define the values for Wet and Noise

wet <- 8  # Wet performance rating
noise <- 8  # Noise performance rating

Step 2: Extract the model coefficients

b0 <- coefficients[1]  # Intercept
b1 <- coefficients[2]  # Coefficient for Wet
b2 <- coefficients[3]  # Coefficient for Noise

Step 3: Calculate the Logit value

logit <- b0 + b1 * wet + b2 * noise  # Logit formula

Step 4: Convert the Logit value to a probability

probability <- exp(logit) / (1 + exp(logit))  # Logit to probability formula

Step 5: Convert the probability to percentage and round to two decimal places

percentage <- round(probability * 100, 2)  # Convert to percentage

Step 6: Print the result

cat("Estimated Probability (%):", percentage, "\n")

Question 5

Step 1: Define the values for Wet and Noise

wet <- 7  # Wet performance rating
noise <- 7  # Noise performance rating

Step 2: Calculate the Logit value

logit <- b0 + b1 * wet + b2 * noise  # Logit formula

Step 3: Convert the Logit value to a probability

probability <- exp(logit) / (1 + exp(logit))  # Logit to probability formula

Step 4: Convert the probability to percentage and round to two decimal places

percentage <- round(probability * 100, 2)  # Convert to percentage

Step 5: Print the result

cat("Estimated Probability (%):", percentage, "\n")