# Set chunk options
knitr::opts_chunk$set(echo = T, comment = NA, warning = F, message = F)
# Set document options
options(scipen = 999)
# List necessary packages
packages <- c('dplyr', 'plotly') # dplyr for data manipulation | plotly for visualization
# Install packages if not existing in local R
installed_packages <- packages %in% rownames(installed.packages())
if (any(installed_packages == FALSE)) {
install.packages(packages[!installed_packages])
}
# Load libraries
invisible(lapply(packages, library, character.only = TRUE))
####################
### Pseudo code
####################
# Coin (nr_tosses, nr_head, S)
#
# Input:
# S: integer defining the number of experiments
# nr_tosses: integer defining the number of coin tosses
# nr_head: integer defining the number of heads
#
# Output: proportion of trials with exactly nr_head times heads
#
# Set out_vec a vector of length S
# For s in 1:S
# Sample nr_tosses from (“heads”,”tails”) with replacement
# Count H, the number of occurrences of heads
# If H == nr_head then
# Out_vec[s] = 1
# Else
# Out_vec[s] = 0
# Return sum(Out_vec)/ S
####################
coin_tossing <- function(number_coins, number_heads, number_tossing, seed = 42) {
########################################
# Argument description
# *********************
# Number of coin tosses: number_coins
# Number of heads: number_heads
# Number of experiments: number_tossing
########################################
# Set seed
set.seed(seed)
# Create initial vector of tosses
tosses <- rep(0, number_tossing)
# Toss the coin and count the heads
tosses <- sapply(tosses, function(i) {sum(rbinom(n = number_coins, size = 1, prob = 0.5))})
#print(tosses)
# Check if condition is satisfied
check <- case_when(tosses == number_heads ~ 1, TRUE ~ 0)
#print(check)
# Calculate probability of output satisfying the condition
output <- sum(check)/number_tossing
#print(output)
# Return output of function
return(output)
}
#coin_tossing(n = 2, k = 1, number_tossing = 10)
coin_tossing(number_coins = 1000,
number_heads = 521,
number_tossing = 10e3) # S should be smaller than 10e6 or the process will take so long
[1] 0.011
Funtion: monty_hall(number_doors, switch_decision, number_games)
Input:
number_doors: number of doors in the game
switch_decision: logical value as decision to switch or not
number_games: number of games played
Output: probability of winning (a proportion of times of choosing winning door over number of games played)
Steps:
Initiate with the vector of zeroes (games_played) with length = number_games
Repeat the following for each element i of games_played:
Set series of doors (1:number_doors)
Sample the original chosen door
Sample the winning door
If the player switches the door (switch_decision = TRUE), check condition of winning and mark value for games_played[i]:
If the original chosen door is the winning door, then mark 0, else mark 1
Else if the player does not switch, check condition of winning:
If the original chosen door is the winning door, then mark 1, else mark 0
Calculate probability of winning = sum(games_played)/number_games
monty_hall <- function(number_doors = 3, switch_decision, number_games, seed = 12) {
########################################
# Argument description
# *********************
# number_doors: number of doors in the game, default as 3
# switch_decision: decision to switch or not
# number_games: number of games played
########################################
# Set seed
set.seed(seed)
# Create initial vector of games played
games_played <- rep(0, number_games)
#cat('games_played =', games_played, '\n')
# Set the doors
doors <- 1:number_doors
# Sample the chosen door
chosen_door <- sapply(games_played, function(i) {sample(x = doors, size = 1)})
#cat('chosen_door =', chosen_door, '\n')
# Sample the winning door
winning_door <- sapply(games_played, function(i) {sample(x = doors, size = 1)})
#cat('winning_door =', winning_door, '\n')
# Check if condition is satisfied
if (switch_decision) {
check <- case_when(chosen_door == winning_door ~ 0, TRUE ~ 1)
} else {
check <- case_when(chosen_door == winning_door ~ 1, TRUE ~ 0)
}
#cat('switch =', switch_decision, '- check =', check, '\n')
# Calculate probability of outputs satisfying the condition
output <- sum(check)/number_games
#cat('sum(check) =', sum(check), '- number_games =', number_games, '\n')
# Return output of function
return(output)
}
monty_hall(switch_decision = T, number_games = 100)
[1] 0.65
monty_hall(switch_decision = F, number_games = 100)
[1] 0.35
games <- 1:100
switch_results <- sapply(games, function(n) {monty_hall(number_doors = 3,
switch_decision = TRUE,
number_games = n)})
stay_results <- sapply(games, function(n) {monty_hall(number_doors = 3,
switch_decision = FALSE,
number_games = n)})
results <- data.frame(n = games, switch = switch_results, stay = stay_results)
line_plot <- results %>%
plot_ly(type = 'scatter', mode = 'lines',
x = ~n,
y = ~switch, name = 'Switch', line = list(color = '#ff9800')) %>%
add_trace(y = ~stay, name = 'Stay', line = list(color = '#27aee1')) %>%
layout(title = 'Winning probability',
xaxis = list(title = 'Number of games played', showgrid = T),
yaxis = list(title = 'Probability', showgrid = T)
)
line_plot
Conclusion: The best strategy is to switch doors.
monty_hall_variant2 <- function(number_doors = 4, switch_decision = T, number_games, seed = 123) {
########################################
# Argument description
# *********************
# number_doors: number of doors in the game, default as 4
# switch_decision: decision to switch or not, default as TRUE
# number_games: number of games played
########################################
# Set seed
set.seed(seed)
# Create initial vector of games played
games_played <- rep(0, number_games)
#cat('games_played =', games_played, '\n')
# Set the doors
doors <- 1:number_doors
# Sample the chosen door
chosen_door <- sapply(games_played, function(i) {sample(x = doors, size = 1)})
#cat('chosen_door =', chosen_door, '\n')
# Sample the winning door
winning_door <- sapply(games_played, function(i) {sample(x = doors, size = 1)})
#cat('winning_door =', winning_door, '\n')
# Sample the new door (switched)
switch_door <- sapply(chosen_door, function(i) {sample(x = doors[-i], size = 1)})
#cat('switch_door =', switch_door, '\n')
# Check if condition is satisfied
if (switch_decision) {
check <- case_when(switch_door == winning_door ~ 1, TRUE ~ 0)
} else {
check <- case_when(chosen_door == winning_door ~ 1, TRUE ~ 0)
}
#cat('switch =', switch_decision, '- check =', check, '\n')
# Calculate probability of outputs satisfying the condition
output <- sum(check)/number_games
#cat('sum(check) =', sum(check), '- number_games =', number_games, '\n')
# Return output of function
return(output)
}
monty_hall_variant2(number_games = 5)
[1] 0.6
monty_hall_variant2(number_games = 10)
[1] 0.4
monty_hall_variant2(number_games = 100)
[1] 0.17
monty_hall_variant2(number_games = 1000)
[1] 0.254
monty_hall_variant2(number_games = 1000)
[1] 0.254
monty_hall_variant2(number_games = 5000)
[1] 0.2518
games <- 1:200
switch_results <- sapply(games, function(n) monty_hall_variant2(number_games = n))
results <- data.frame(n = games, switch = switch_results)
line_plot <- results %>%
plot_ly(type = 'scatter', mode = 'lines',
x = ~n,
y = ~switch, name = 'Switch', line = list(color = '#009933')) %>%
layout(title = 'Winning probability',
xaxis = list(title = 'Number of games played', showgrid = T),
yaxis = list(title = 'Probability', showgrid = T)
)
line_plot
birth_day_exp <- function(n, number_groups, seed = 51) {
########################################
# Argument description
# *********************
# n: number of people in the group
# number_groups: number of groups to check
########################################
# Set seed
set.seed(seed)
# Create initial vector of group
group <- rep(0, number_groups)
#cat('group =', group, '\n')
# Set the series of birthdays
birth_days <- 1:365
#cat('birth_days =', birth_days, '\n')
# Sample the birthday and count the unique birthdays
sampling <- sapply(group, function(i) length(unique(sample(x = birth_days, size = n, replace = T))) )
#cat('sampling =', sampling, '\n')
# Check if condition is satisfied
check <- case_when(sampling < n ~ 1, TRUE ~ 0)
#cat('check =', check, '\n')
# Calculate probability of outputs satisfying the condition
output <- sum(check)/number_groups
#cat('sum(check) =', sum(check), '- number_groups =', number_groups, '\n')
# Return output of function
return(output)
}
birth_day_exp(n = 15, number_groups = 2000)
[1] 0.2535
birth_day_exp(n = 15, number_groups = 4000)
[1] 0.262
birth_day_exp(n = 40, number_groups = 4000)
[1] 0.8905
class_sizes <- 5:200
same_birthday <- sapply(class_sizes, function(n) birth_day_exp(n = n, number_groups = 1000))
results <- data.frame(n = class_sizes, same = same_birthday)
line_plot <- results %>%
plot_ly(type = 'scatter', mode = 'lines',
x = ~n,
y = ~same, name = 'Same Birthday', line = list(color = '#009933')) %>%
layout(title = 'Probability of at least 2 students having the same birthday',
xaxis = list(title = 'Class size', showgrid = T),
yaxis = list(title = 'Probability', showgrid = T)
)
line_plot
\[ \dfrac{365 \times 364 \times 363... \times 326}{365^{40}}=0.1087682 \]
\[ 1 - 0.1087682 = 0.8912318 \]
Comparison: The results obtained from analytical solution and simulation are the same (\(\approx 0.89\)).