To demonstrate the choice methods, we begin with the Sydney Transportation study. Commuters can choose to go into city either by car or by train. The response in binary so we can choose the logistic regression model. In this study we know the time and cost of travel by car and by train. These are the explanatory varibales in this case. We use the linear combinations of four explanatory varibales(car time, car cost, Train time, train cost) to predict the consumer choice.

# Predicting Commuter Transportation Choices (R)

library(lattice)  # multivariate data visualization
names(sydney) <- c("Car_Time", "Car_Cost", "Train_Time", "Train_Cost", "Choice")
plotting_data_frame <- sydney[, 1:4]

Scatter plot matrix with simple linear regression. Models and lowess smooth fits for variable pairs

pairs(plotting_data_frame,
    panel = function(x, y) {
        points(x, y, cex = 0.5)
        abline(lm(y ~ x), lty = "solid", col = "red")
        lines(lowess(x, y))
        }
    )

correlation heat map for the explanatory variables

sydney_cormat <- 
    cor(sydney[, c("Car_Time", "Car_Cost", "Train_Time", "Train_Cost")])
correlation_heat_map(sydney_cormat)

Fit Logistic Model

# specify and fit logistic regression model
sydney_model <- {Choice ~ Car_Time + Car_Cost + Train_Time + Train_Cost}
sydney_fit <- glm(sydney_model, family=binomial, data=sydney)
print(summary(sydney_fit))
## 
## Call:
## glm(formula = sydney_model, family = binomial, data = sydney)
## 
## Deviance Residuals: 
##     Min       1Q   Median       3Q      Max  
## -2.1291  -0.6151  -0.1621   0.6166   2.8337  
## 
## Coefficients:
##              Estimate Std. Error z value Pr(>|z|)    
## (Intercept) -1.444027   0.584971  -2.469   0.0136 *  
## Car_Time     0.056507   0.010911   5.179 2.23e-07 ***
## Car_Cost     0.029842   0.006968   4.283 1.84e-05 ***
## Train_Time   0.014918   0.009482   1.573   0.1156    
## Train_Cost  -0.111293   0.016521  -6.737 1.62e-11 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 458.36  on 332  degrees of freedom
## Residual deviance: 272.63  on 328  degrees of freedom
## AIC: 282.63
## 
## Number of Fisher Scoring iterations: 5
print(anova(sydney_fit, test="Chisq"))
## Analysis of Deviance Table
## 
## Model: binomial, link: logit
## 
## Response: Choice
## 
## Terms added sequentially (first to last)
## 
## 
##            Df Deviance Resid. Df Resid. Dev  Pr(>Chi)    
## NULL                         332     458.36              
## Car_Time    1   90.402       331     367.96 < 2.2e-16 ***
## Car_Cost    1   22.822       330     345.14 1.778e-06 ***
## Train_Time  1    7.563       329     337.57  0.005956 ** 
## Train_Cost  1   64.938       328     272.63 7.727e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
# compute predicted probability of taking the train 
sydney$Predict_Prob_TRAIN <- predict.glm(sydney_fit, type = "response") 
#pdf(file = "fig_predicting_choice_density_evaluation.pdf", 
#    width = 8.5, height = 8.5)

plotting_object <- densityplot( ~ Predict_Prob_TRAIN | Choice, 
               data = sydney, 
               layout = c(1,2), aspect=1, col = "darkblue", 
               plot.points = "rug",
               strip=function(...) strip.default(..., style=1),
               xlab="Predicted Probability of Taking Train") 
print(plotting_object) 

predicted car-or-train choice using 0.5 cut-off

sydney$Predict_Choice <- ifelse((sydney$Predict_Prob_TRAIN > 0.6), 2, 1)
sydney$Predict_Choice <- factor(sydney$Predict_Choice,
    levels = c(1, 2), labels = c("CAR", "TRAIN"))

Creating confusion matrix

confusion_matrix <- table(sydney$Predict_Choice, sydney$Choice)
cat("\nConfusion Matrix (rows = Predicted Choice, columns = Actual Choice\n")
## 
## Confusion Matrix (rows = Predicted Choice, columns = Actual Choice
print(confusion_matrix)
##        
##         CAR TRAIN
##   CAR   163    37
##   TRAIN  20   113

Accuracy

predictive_accuracy <- (confusion_matrix[1,1] + confusion_matrix[2,2])/
                        sum(confusion_matrix)                                              
cat("\nPercent Accuracy: ", round(predictive_accuracy * 100, digits = 1))
## 
## Percent Accuracy:  82.9

How much lower would train ticket prices have to be to increase public transportation usage (TRAIN) by 10 percent?

train_cost_vector <- 
     seq(min(sydney$Train_Cost), max(sydney$Train_Cost), length=1000)
beta.vector <- sydney_fit$coefficients
train_probability_vector <- numeric(1000)
for (i in 1:1000) {
       X.vector <- c(1, mean(sydney$Car_Time), mean(sydney$Train_Time),
           mean(sydney$Car_Cost), train_cost_vector[i])
       train_probability_vector[i] <- 
           exp(X.vector %*% beta.vector)/
               (1 + exp(X.vector %*% beta.vector))
       } 

Currently 150 out of 333 commuters (45 percent) use the train.
Determine price required for 55 percent of commuters to take the train this is the desired quota set by public administrators

index <- 1  # beginning index for search
while (train_probability_vector[index] > 0.60) index <- index + 1
Solution_Price <- train_cost_vector[index]
cat("\nSolution Price: ", Solution_Price)
## 
## Solution Price:  32.04505
Current_Mean_Price <- mean(sydney$Train_Cost)

How much do administrators need to lower prices? use greatest integer function to ensure quota is exceeded

Cents_Lower <- ceiling(Current_Mean_Price - Solution_Price)
cat("\nLower prices by ", Cents_Lower, "cents\n")
## 
## Lower prices by  7 cents
plot(train_cost_vector, train_probability_vector,
     type="l",ylim=c(0,1.0), las = 1, 
     xlab="Cost of Taking the Train (in cents)",
     ylab="Estimated Probability of Taking the Train")