To demonstrate the choice methods, we began with the Sydney Transportation study in the forst part. Commuters can choose to go into city either by car or by train. The response in binary so we can choose the logistic regression model. In this study we know the time and cost of travel by car and by train. These are the explanatory varibales in this case. We use the linear combinations of four explanatory varibales(car time, car cost, Train time, train cost) to predict the consumer choice.
# Predicting Commuter Transportation Choices (R)
library(lattice) # multivariate data visualization
load("/Users/neha/Documents/marketing/MDS_chapter_2/MDS_Chapter_2/correlation_heat_map.RData")
sydney <- read.csv("/Users/neha/Documents/marketing/MDS_chapter_2/MDS_Chapter_2/sydney.csv")
names(sydney) <- c("Car_Time", "Car_Cost", "Train_Time", "Train_Cost", "Choice")
# specify and fit logistic regression model
sydney_model <- {Choice ~ Car_Time + Car_Cost + Train_Time + Train_Cost}
sydney_fit <- glm(sydney_model, family=binomial, data=sydney)
print(summary(sydney_fit))
##
## Call:
## glm(formula = sydney_model, family = binomial, data = sydney)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -2.1291 -0.6151 -0.1621 0.6166 2.8337
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -1.444027 0.584971 -2.469 0.0136 *
## Car_Time 0.056507 0.010911 5.179 2.23e-07 ***
## Car_Cost 0.029842 0.006968 4.283 1.84e-05 ***
## Train_Time 0.014918 0.009482 1.573 0.1156
## Train_Cost -0.111293 0.016521 -6.737 1.62e-11 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 458.36 on 332 degrees of freedom
## Residual deviance: 272.63 on 328 degrees of freedom
## AIC: 282.63
##
## Number of Fisher Scoring iterations: 5
print(anova(sydney_fit, test="Chisq"))
## Analysis of Deviance Table
##
## Model: binomial, link: logit
##
## Response: Choice
##
## Terms added sequentially (first to last)
##
##
## Df Deviance Resid. Df Resid. Dev Pr(>Chi)
## NULL 332 458.36
## Car_Time 1 90.402 331 367.96 < 2.2e-16 ***
## Car_Cost 1 22.822 330 345.14 1.778e-06 ***
## Train_Time 1 7.563 329 337.57 0.005956 **
## Train_Cost 1 64.938 328 272.63 7.727e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
# compute predicted probability of taking the train
sydney$Predict_Prob_TRAIN <- predict.glm(sydney_fit, type = "response")
sydney$Predict_Choice <- ifelse((sydney$Predict_Prob_TRAIN > 0.5), 2, 1)
sydney$Predict_Choice <- factor(sydney$Predict_Choice,
levels = c(1, 2), labels = c("CAR", "TRAIN"))
sydney$Predict_Choice <- ifelse((sydney$Predict_Prob_TRAIN > 0.5), 2, 1)
sydney$Predict_Choice <- factor(sydney$Predict_Choice,
levels = c(1, 2), labels = c("CAR", "TRAIN"))
confusion_matrix <- table(sydney$Predict_Choice, sydney$Choice)
cat("\nConfusion Matrix (rows = Predicted Choice, columns = Actual Choice\n")
##
## Confusion Matrix (rows = Predicted Choice, columns = Actual Choice
print(confusion_matrix)
##
## CAR TRAIN
## CAR 155 30
## TRAIN 28 120
predictive_accuracy <- (confusion_matrix[1,1] + confusion_matrix[2,2])/
sum(confusion_matrix)
cat("\nPercent Accuracy: ", round(predictive_accuracy * 100, digits = 1))
##
## Percent Accuracy: 82.6
#newdata <- subset(sydney, sydney$Choice == "CAR")
#View(newdata)
currently 150 out of 333 commuters (45 percent) use the train determine the tax required to have a comparable effect on train ticket price.
car_cost_vector <-
seq(min(sydney$Car_Cost), max(sydney$Car_Cost), length=1000)
beta.vector <- sydney_fit$coefficients
car_probability_vector <- numeric(1000)
for (i in 1:1000) {
X.vector <- c(1, mean(sydney$Car_Time), mean(sydney$Train_Time),
mean(sydney$Car_Cost), car_cost_vector[i])
car_probability_vector[i] <-
exp(X.vector %*% beta.vector)/
(1 + exp(X.vector %*% beta.vector))
}
index <- 1 # beginning index for search
while (car_probability_vector[index] > 0.55) index <- index + 1
Solution_Price <- car_cost_vector[index]
cat("\nSolution Price: ", Solution_Price)
##
## Solution Price: 34
Current_Mean_Price <- mean(sydney$Car_Cost)
Current_Mean_Price
## [1] 57.75375
Cents_tax <- ceiling(Current_Mean_Price - Solution_Price)
cat("\nTax imposed should be ", Cents_tax, "cents\n")
##
## Tax imposed should be 24 cents