Monty Hall Simulation

Problem

“Suppose you’re on a game show, and you’re given the choice of three doors: Behind one door is a car; behind the others, goats. You pick a door, say No. 1, and the host, who knows what’s behind the doors, opens another door, say No. 3, which has a goat. He then says to you,”Do you want to pick door No. 2?" Is it to your advantage to switch your choice?"

Libraries

# load libraries
library(ggplot2)

## Warning: package 'ggplot2' was built under R version 4.0.3

library(reshape2)

Process

N <- 10000 # Number of guesses per trial
T <- 100 # trials
 
# Function to create a list with N-number
# winning doors who are randomly chosen from 1,2,3
winning.door.func <- function() {
 winning.door <- sample(1:3, N, replace=TRUE)
}
 
# Create a matrix with the winning.door.func()
# replicated T-number of times.
winning.door.matrix <- replicate(T, winning.door.func())
switch.door.matrix <- 2 + (winning.door.matrix != 2)

## Option 1: STAY with first choice.
#create a matrix with number of winnings for each guess and trail.
stay.winnings <- switch.door.matrix
# change value to 1 on each winnings.
stay.winnings[winning.door.matrix == 1] <- 1
# change value to 0 on each loss.
stay.winnings[winning.door.matrix != 1] <- 0
# make each trail cumulative for nicer plots.
for (i in 1:T) {
 stay.winnings[,i] <- cumsum(stay.winnings[,i])
}
 
## Option 2: Always switch
switch.winnings <- switch.door.matrix
# change value to 1 if we switch to the winning door.
switch.winnings[winning.door.matrix == switch.door.matrix] <- 1
# change value to 0 if we switch to a losing door.
switch.winnings[winning.door.matrix != switch.door.matrix] <- 0
# make each trail cumulative for nicer plots.
for (i in 1:T) {
 switch.winnings[,i] <- cumsum(switch.winnings[,i])
}
 
# Create a data.frames from the switch.door.matrix
switch.data <- as.data.frame(switch.winnings)
# Add a column named "category" cointaining row index.
switch.data$category <- row.names(switch.data)
# "Melt" the data.frame from wide to long-format.
# The reason for doing this is so we can plot
# each trail in the same plot, using trail as "group".
switch.data.molten <- melt(switch.data)

## Using category as id variables

# Melt stay data.
stay.data <- as.data.frame(stay.winnings)
stay.data$category <- row.names(stay.winnings)
stay.data.molten <- melt(stay.data)

## No id variables; using all as measure variables

ggplot(stay.data.molten, aes(1:N,value, group=variable, color="Stay")) +
 ylab("Winnings (cumulative)") + # y-label
 xlab("Guesses") + # x-label
 # draw stay.data as a lines
 geom_line(alpha = I(3/10)) +
 # add another layer to draw switch.data as a lines.
 geom_line(data=switch.data.molten, aes(1:N,value, group=variable, 
                                    color="Switch"),
 alpha = I(3/10)) +
 # create a manual legend
 scale_color_manual("Choices", values = c("Switch" = "blue","Stay" = "red"))

Outputs

paste0("Mean probabability of switching: ",mean(colMeans(switch.data[N,1:T]))/N)

## [1] "Mean probabability of switching: 0.667138"

paste0("Mean probabability of staying: ",mean(colMeans(stay.data[N,1:T]))/N)

## [1] "Mean probabability of staying: 0.332862"

Bayesian context

\[\begin{align*} \Pr(B \mid Open(C)) &= \frac{\Pr(Open(C) \mid B) \times \Pr(B)}{\Pr(Open(C))} && \text{by Bayes} \\ &= \frac{1 \times \Pr(B)}{\Pr(Open(C))} && \text{because $B \implies Open(C)$} \\ &= \frac{1/3}{\Pr(Open(C))} && \text{because each door is equally probable} \\ &= \frac{1}{3} \frac{1}{\Pr(Open(C) \mid A) \times \Pr(A) + \Pr(Open(C) \mid B) \times \Pr(B)} && \text{because $A$ or $B$ must be true when $C$ is open} \\ &= \frac{1}{\Pr(Open(C) \mid A) + \Pr(Open(C) \mid B)} && \text{because $\Pr(A) = \Pr(B) = 1/3$}, \\ \end{align*}\]

Note that the echo = FALSE parameter was added to the code chunk to prevent printing of the R code that generated the plot.