Authors@R: person(“Annice”, “Najafi”, email = “annicenajafi27@gmail.com”)
options(warn=-1)
References:
Ross, S. M. (2014). Introduction to stochastic dynamic programming. Academic press. Summer 2024
This is an R implementation of the gambling model from Chapter 1, section 2 (“A Gambling Model”) of the “Introduction to Stochastic Dynamic Programming” book by Sheldon Ross. The following text is taken directly from the textbook.
“At each play of the game, a gambler can bet any nonnegative amount up to his present fortune and will either win or lose that amount with probabilities \(p\) and \(q = 1 - p\), respectively. The gambler is allowed to make \(n\) bets, and his objective is to maximize the expectation of the logarithm of his final fortune. What strategy achieves this end?”
# Load libraries
library(ggplot2)
library(shiny)
#Define V_n
#' Recursive function returns the maximal expected return
#'
#' @param n number of bets
#' @param x present fortune
#' @param p is the probability of winning, q is the probability of losing
#' hence p = 1-q
#' @param alpha is the percentage of fortune we are betting
#'
#' @return expected return value
#' @export
V_n <- function(n, x, p, alpha) {
if (n == 0) {
return(log(x))
}
V_previous <- function(x) {
V_n(n - 1, x, p, alpha)
}
expected_reward <- p * V_previous(x * (1 + alpha)) + (1-p) * V_previous(x * (1 - alpha))
return(expected_reward)
}
#Simulate problem
#' Simulate the case where a gambler is betting a percentage of his fortune
#' and
#' @param n number of bets
#' @param initial_x initial fortune of the gambler
#' @param p probability of winning in each stage
#' @param alpha percentage the gambler bets
#'
#' @return log of fortune
#' @export
simulate_problem <- function(n, initial_x, p, alpha) {
x <- initial_x
for (i in 1:n) {
if (rbinom(1, 1, p) == 0) { #it's similar to flipping a coin with p prob
#of getting heads
x <- x * (1 - alpha) #you either get tails and lose
} else {
x <- x * (1 + alpha) #or get heads and win
}
}
return(log(x)) # return log of fortune
}
# Params
initial_fortune <- 1000 # Initial fortune
p <- 0.6 # Probability of winning
q <- 1- p #probability of losing
n <- 10 # Number of bets
alpha <- 0.3 #What fraction of his fortune is he betting
# Calculate V_n(x)
res_vector <- c() #store results
#Go over all values
for(alpha in seq(0, p, 0.01)){
result <- V_n(n, initial_fortune, p, alpha)
res_vector <- c(res_vector, result)
}
reward.df <- data.frame(alpha = seq(0, p, 0.01), reward = res_vector)
# Simulate
num_simulations <- 10000
alpha_values <- seq(0, p, 0.01)
expected_rewards_sim <- numeric(length(alpha_values))
for (j in seq_along(alpha_values)) {
alpha <- alpha_values[j]
fortunes <- numeric(num_simulations)
for (i in 1:num_simulations) {
fortunes[i] <- simulate_problem(n, initial_fortune, p, alpha)
}
expected_rewards_sim[j] <- mean(fortunes)
}
reward.df_sim <- data.frame(alpha = alpha_values, reward = expected_rewards_sim)
ggplot() +
geom_point(data = reward.df_sim, aes(x = alpha, y = reward), stroke = 2, color = "#D96098", shape = 8) +
geom_line(data = reward.df, aes(x = alpha, y = reward), size = 2, color = "#F7DBF0") +
labs(x = "Alpha", y = "Expected Reward", title = "Gambling model") +
geom_vline(xintercept = (p - q), linetype="dashed")+
theme(
panel.grid.major = element_blank(),
panel.grid.minor = element_blank(),
panel.background = element_blank(),
axis.line = element_line(colour = "black"),
plot.title = element_text(hjust = 0.5, size = 20),
axis.text = element_text(size = 15),
text = element_text(size = 18)
) +
labs(tag = "A")
p <- 0.3 # Probability of winning
q <- 1- p #probability of losing
# Calculate V_n(x)
res_vector <- c() #store results
#Go over all values
for(alpha in seq(0, p, 0.01)){
result <- V_n(n, initial_fortune, p, alpha)
res_vector <- c(res_vector, result)
}
reward.df <- data.frame(alpha = seq(0, p, 0.01), reward = res_vector)
# Simulate
num_simulations <- 10000
alpha_values <- seq(0, p, 0.01)
expected_rewards_sim <- numeric(length(alpha_values))
for (j in seq_along(alpha_values)) {
alpha <- alpha_values[j]
fortunes <- numeric(num_simulations)
for (i in 1:num_simulations) {
fortunes[i] <- simulate_problem(n, initial_fortune, p, alpha)
}
expected_rewards_sim[j] <- mean(fortunes)
}
reward.df_sim <- data.frame(alpha = alpha_values, reward = expected_rewards_sim)
ggplot() +
geom_point(data = reward.df_sim, aes(x = alpha, y = reward), stroke = 2, color = "#D96098", shape = 8) +
geom_line(data = reward.df, aes(x = alpha, y = reward), size = 2, color = "#F7DBF0") +
labs(x = "Alpha", y = "Expected Reward", title = "Gambling model") +
geom_vline(xintercept = 0, linetype = "dashed") +
theme(
panel.grid.major = element_blank(),
panel.grid.minor = element_blank(),
panel.background = element_blank(),
axis.line = element_line(colour = "black"),
plot.title = element_text(hjust = 0.5, size = 20),
axis.text = element_text(size = 15),
text = element_text(size = 18)
) +
labs(tag = "B")
To try an interactive version of this problem please visit this link here!
Now let’s make the problem more advanced:
This is an R implementation of the gambling model with changing win probabilities from Chapter 1, section 4 (“A Gambling Model With Changing Win Probabilities”) of the “Introduction to Stochastic Dynamic Programming” book by Sheldon Ross.
A gambler that has x fortune, makes n bets wins with probability p At each stage the gambler has p probability of winning, with alpha probability of winning for previous stages taken from a distribution F. The gambler then decides to bet y from 0 to his current fortune (x). Goal is to maximize the expectation of the log of his fortune.
NOTE: utility function taken to be u(x) = log(x) like before, adding 1 to prevent errors.
NOTE: alpha is not the percentage we are betting but rather the probability of winning at the previous stage, don’t confuse this with the alpha in the gambling model in section 2.
Let’s start by loading relevant libraries
#Load relevant libraries
library(ggplot2)
library(truncnorm)
# Define the probability density function, going with the truncated normal dist
#' probability density function for the truncated normal dist
#'
#' @param alpha the winning probability
#' @return the pdf
#' @export
f <- function(alpha) {
return(dtruncnorm(alpha, a = 0, b = 1, mean = 0.5, sd = 0.1))
}
#Great now lets define the utility function
#' Utility function
#'
#' @param x
#' @return log(x+1), adding 1 because if the gambler bets all of his fortune
#' then we will get an error because log(0) is undefined
#' @export
u <- function(x){
return(log(x+1))
}
#' Recursive function that returns the maximum expected reward
#'
#' @param n how many times we are betting
#' @param x the fortune
#' @param p the probability of winning
#'
#' @return the maximal expected reward
#' @export
V <- function(n, x, p) {
# Obviously first we define the base case
if (n == 0) {
return(u(x))
}
#alpha <- rtruncnorm(1, a = 0, b = 1, mean = 0.5, sd = 0.1) # This is how you would have taken alpha randomly from a truncated normal distribution.
hold.integrand_win <- function(alpha) {
V(n - 1, (x + y), alpha) * f(alpha)
}
hold.integrand_lose <- function(alpha) {
V(n - 1, (x - y), alpha) * f(alpha)
}
max_y <- 0 # Initialize max_y
#Find the best y:
for (y in 0:x) {
val <- p * integrate(hold.integrand_win, 0, 1)$value +
(1 - p) * integrate(hold.integrand_lose, 0, 1)$value
# Use max to find the maximum value
max_y <- max(max_y, val)
}
return(max_y) #Return the maximal reward found through searching all y values
}
# Initial fortune
x_initial <- 10
# Initial win probability
p_initial <- 0.5
# Number of plays
n <- 3
c()->return_vect
for(p_initial in seq(0.5, 1, 0.05)){
# Calculate maximal expected final utility
max_expected_utility <- V(n, x_initial, p_initial)
return_vect <- c(return_vect, max_expected_utility)
}
df <- data.frame(p= seq(0.5, 1, 0.05), return=return_vect)
ggplot(df, aes(x=p, y=return_vect))+geom_line(size=2, color = "#538392")+
geom_point(color="#538392", shape=8, stroke=3)+
labs(x="Probability of winning", y="Return")+
theme(
panel.grid.major = element_blank(),
panel.grid.minor = element_blank(),
panel.background = element_blank(),
axis.line = element_line(colour = "black"),
plot.title = element_text(hjust = 0.5, size=20),
axis.text = element_text(size = 15),
text = element_text(size=18))+labs(tag="A")
hold.vect <- c()
x<-10
p<-0.8
n<-3
for (y in seq(0, x, 1)){
hold.val <- V(n - 1, x + y, p) - V(n - 1, x - y, p)
hold.vect <- c(hold.vect, hold.val)
}
y_increasing <- data.frame(y = seq(0, x, 1), equation = hold.vect)
ggplot(y_increasing, aes(x=y, y=equation))+geom_line(size=2, color = "#538392")+
geom_point(color="#538392", shape=8, stroke=3)+
labs(x="y", y="Return")+
theme(
panel.grid.major = element_blank(),
panel.grid.minor = element_blank(),
panel.background = element_blank(),
axis.line = element_line(colour = "black"),
plot.title = element_text(hjust = 0.5, size=20),
axis.text = element_text(size = 15),
text = element_text(size=18))+labs(tag="B")
This is an R implementation of the stock option model from Chapter 1, section 3 (“A Stock-Option Model”) of the “Introduction to Stochastic Dynamic Programming” book by Sheldon Ross.
An option for a stock is the ability to buy a stock for a pre-determined price. That means you can pay some money to have the right to buy a specific share of a company for a pre-determined price even if the stock price increases later on.
First let’s start by loading relevant libraries
#Load libraries
library(stats)
library(latex2exp)
library(ggplot2)
# Define the probability density function
f <- function(x) {
dnorm(x, mean = 0, sd = 1)
}
#Define V_n
#' Recursive function returns the maximal expected return
#'
#' @param n number of days
#' @param s current price of the stock
#' @param c past price that we buy the stock at
#' @return maximal expected return value
#' @export
V <- function(n, s, c) {
if (n == 0) {
return(max((s-c), 0))
}
#Remember F(x) is f(x)dx so we can multiply by the pdf and integrate with
#respect to x.
hold.integrand <- function(x) {V(n - 1, s + x, c) * f(x)}
expected_future_value <- integrate(hold.integrand, -Inf, Inf)$value
# Two possible scenarios here
# Case I) immediately sell the option
# Case II) wait and see what happens, hoping for better future value
return(max(s - c, expected_future_value))
}
Let’s test
# Parameters
initial_stock_price <- 10 # Initial stock price
c <- 8 # stock purchase price
n <- 3 # Number of days
result <- V(n, initial_stock_price, c)
s_values <- seq(0, 100, by = 5)
V_n_s <- c()
for(i in 1:length(s_values)) {
s <- s_values[i]
V_n_s_value <- V(n, s, c)
V_n_s <- c(V_n_s , V_n_s_value)
}
hold.df <- data.frame(s = s_values, V_n_s = (V_n_s_value - s_values))
ggplot(hold.df, aes(s, V_n_s))+
geom_line(size=2, color = "#538392")+
labs(x="s", y=TeX("$V_n(s) - s$"))+
ggtitle(TeX("$V_n(s) - s$ is decreasing in s"))+
theme(
panel.grid.major = element_blank(),
panel.grid.minor = element_blank(),
panel.background = element_blank(),
axis.line = element_line(colour = "black"),
plot.title = element_text(hjust = 0.5, size=20),
axis.text = element_text(size = 15),
text = element_text(size=18))+labs(tag="A")
We just showed Lemma 3.1 using R!
The following R code is related to the special case of the problem - “Optimal Allocations In the Construction of k-out-of-n Reliability Systems” - by Derman et al, 1974.
References:
Derman, C., Lieberman, G. J., & Ross, S. M. (1974). Optimal allocations in the construction of k-out-of-n reliability systems. Management Science, 21(3), 241-250.
I am going to start this section by considering the most basic problem which is the situation where we are considering money that gets allocated for an n component system that is getting built. Assume the case where we need to have at least one successful component for the system to work.
Case k=1
Let’s think about this, if we are allocating money simultaneously then we are essentially trying to maximize the probability of one component being successful. That would be \(1-(1-p(x_1))(1-p(x_2))...(1-p(x_n))\) (\(1 - \prod_1^n(1-p(x_i))\)) with \(x_i\) being the money allocated at the ith stage and \(i\) from 1 to n (total number of stages).
The probability of success of the component depends on how much we invest. If we invest 0 dollars, the probability of success is 0 meaning p(0)=0.
Now consider the case where we are unaware of whether a component will be successful or not until it is built. In that case we start with the first component and the probability of success of the component would be p(x_1). If it is unsuccessful then we move on to the next component so its probability would be conditioned on the first component being unsuccessful which is \(p(x_1)(1-p(x_2))\) and so on (\(p(x_1) + \sum_{i=2}^n \prod_{j=1}^{i-1} (1 - p(x_j)) p(x_i)\)).
Below we are going to answer the question of whether investing all of our money into the first task is optimal or dividing our money into all tasks equally and invest in each of them. We take two cases where p(x) is concave and convex and then proceed by increasing the number of components we would need to be successful (k).
library(Deriv)
library(ggplot2)
# Define the function for probability of success
P <- function(x) {
alpha <- 0.1
return((exp(alpha * x) - 1) / (exp(alpha * x) + 1))
}
#If you take the log of the equation we talked about above for the
#non-sequential case
alpha <- 0.1
log_expr <- expression(log(1 - (1 - (exp(alpha * x) - 1) / (exp(alpha * x) + 1))))
# and find the first derivative of log(1 - P(x)) with respect to x
first_derivative <- Deriv(log_expr, "x")
# and then the second derivative of log(1 - P(x)) with respect to x
# to check if it is concave or convex - remember that a function is
# concave if its second derivative is negative and convex if positive
second_derivative <- Deriv(first_derivative, "x")
x <- seq(0, 100, 1) #assume the amount we are investing is between 0 and 100
evaluated_second_deriv <- eval(second_derivative)
#Which we see here that the function is actually concave
ggplot(data.frame(x=x, y=P(x)), aes(x=x, y=y))+geom_point(stroke=2,shape=8, color = "#D96098")+
geom_line(size=2, color = "#D96098")+
labs(x="x", y="p(x)")+
ggtitle("p(x) is non-decreasing in x")+
theme(
panel.grid.major = element_blank(),
panel.grid.minor = element_blank(),
panel.background = element_blank(),
axis.line = element_line(colour = "black"),
plot.title = element_text(hjust = 0.5, size=20),
axis.text = element_text(size = 15),
text = element_text(size=18))+labs(tag="A")->plt.A
plt.A
ggplot(data.frame(x=x, y=evaluated_second_deriv), aes(x=x, y=y))+
geom_point(stroke=2,shape=8, color = "#D96098")+
geom_line(size=2, color = "#D96098")+
geom_hline(yintercept=0, linetype="dashed", size=2)+
labs(x="x", y="p\"(x)")+
ggtitle("p(x) is a concave function")+
theme(
panel.grid.major = element_blank(),
panel.grid.minor = element_blank(),
panel.background = element_blank(),
axis.line = element_line(colour = "black"),
plot.title = element_text(hjust = 0.5, size=20),
axis.text = element_text(size = 15),
text = element_text(size=18))+labs(tag="B")->plt.B
plt.B
Again, simulating the success of the outcome is like flipping a coin that has a p(x) probability of getting heads, if p(x) is higher then the probability of getting heads is higher.
#Let's say I am taking the strategy of allocating A/N for each component,
#essentially I am dividing my total money (A) over all N component
#' Simulate problem under strategy 1
#'
#' @param A all of the money we are investing
#' @param n the number of stages
#'
#' @return whether that one task is completed successfully or not
#' @export
simulate_problem_strategy_1 <- function(A, n){
count.success <- 0
for(i in 1:n){
if(rbinom(1, 1, P(A/n)) == 1){
count.success <- count.success + 1
}
}
if(count.success > 1){
return(1)
}else{
return(0)
}
}
#Second strategy is to put all of money down for the first task
#' Simulate problem under strategy 2
#'
#' @param A all of the money we are investing
#' @param n the number of stages
#'
#' @return whether that one task is completed successfully or not
#' @export
simulate_problem_strategy_2 <- function(A, n){
if(rbinom(1, 1, P(A)) == 0){
return(0)
}else{
return(1)
}
}
A <- 10
n <- 100
num_simulations <- 1000
numeric(num_simulations) -> strategy_1.rewards
numeric(num_simulations) -> strategy_2.rewards
for(k in 1:num_simulations){
simulate_problem_strategy_1(A, n) -> strategy_1.rewards[k]
simulate_problem_strategy_2(A, n) -> strategy_2.rewards[k]
}
ggplot(data.frame(name=c("Equally distributed", "All in 1"),
success_rate = c(sum(strategy_1.rewards)/num_simulations, sum(strategy_2.rewards)/num_simulations)),
aes(x = name, y = success_rate))+
geom_bar(stat = "identity", width=0.2, fill= "#D96098")+
labs(x="Strategy", y="Success Rate")+
ggtitle("Optimal solution")+
theme(
panel.grid.major = element_blank(),
panel.grid.minor = element_blank(),
panel.background = element_blank(),
axis.line = element_line(colour = "black"),
plot.title = element_text(hjust = 0.5, size=20),
axis.text = element_text(size = 15),
text = element_text(size=18))
Let’s try a case where p(x) is a convex function
# Define the function for probability of success
P <- function(x) {
return(0.01 * sqrt(x))
}
log_expr <- expression(log(1 - 0.01 * sqrt(x)))
# and find the first derivative of log(1 - P(x)) with respect to x
first_derivative <- Deriv(log_expr, "x")
# and then the second derivative of log(1 - P(x)) with respect to x
# to check if it is concave or convex
second_derivative <- Deriv(first_derivative, "x")
x <- seq(0, 100, 1)
evaluated_second_deriv <- eval(second_derivative)
#Which we see here that the function is actually concave
ggplot(data.frame(x=x, y=P(x)), aes(x=x, y=y))+geom_point(stroke=2,shape=8, color = "#D96098")+
geom_line(size=2, color = "#D96098")+
labs(x="x", y="p(x)")+
ggtitle("p(x) is non-decreasing in x")+
theme(
panel.grid.major = element_blank(),
panel.grid.minor = element_blank(),
panel.background = element_blank(),
axis.line = element_line(colour = "black"),
plot.title = element_text(hjust = 0.5, size=20),
axis.text = element_text(size = 15),
text = element_text(size=18))+labs(tag="A")->plt.A
plt.A
ggplot(data.frame(x=x, y=evaluated_second_deriv), aes(x=x, y=y))+
geom_point(stroke=2,shape=8, color = "#D96098")+
geom_line(size=2, color = "#D96098")+
geom_hline(yintercept=0, linetype="dashed", size=2)+
labs(x="x", y="p\"(x)")+
ggtitle("p(x) is a convex function")+
theme(
panel.grid.major = element_blank(),
panel.grid.minor = element_blank(),
panel.background = element_blank(),
axis.line = element_line(colour = "black"),
plot.title = element_text(hjust = 0.5, size=20),
axis.text = element_text(size = 15),
text = element_text(size=18))+labs(tag="B")->plt.B
plt.B
A <- 100
n <- 100
num_simulations <- 1000
numeric(num_simulations) -> strategy_1.rewards
numeric(num_simulations) -> strategy_2.rewards
for(k in 1:num_simulations){
simulate_problem_strategy_1(A, n) -> strategy_1.rewards[k]
simulate_problem_strategy_2(A, n) -> strategy_2.rewards[k]
}
ggplot(data.frame(name=c("Equally distributed", "All in 1"),
success_rate = c(sum(strategy_1.rewards)/num_simulations, sum(strategy_2.rewards)/num_simulations)),
aes(x = name, y = success_rate))+
geom_bar(stat = "identity", width=0.2, fill= "#D96098")+
labs(x="Strategy", y="Success Rate")+
ggtitle("Optimal solution")+
theme(
panel.grid.major = element_blank(),
panel.grid.minor = element_blank(),
panel.background = element_blank(),
axis.line = element_line(colour = "black"),
plot.title = element_text(hjust = 0.5, size=20),
axis.text = element_text(size = 15),
text = element_text(size=18))
Now let’s increase the number of successful components needed for the system to work to k…
#' Simulate problem under strategy 1
#'
#' @param A all of the money we are investing
#' @param n stages (total components)
#' @param k components we would like to have successfully built
#'
#' @return whether k successful components are built or not
#' @export
simulate_problem_strategy_1 <- function(A, n, k){
count.success <- 0
for(i in 1:n){
if(rbinom(1, 1, P(A/n)) == 1){
count.success <- count.success + 1
}
}
if(count.success >= k){
return(1)
}else{
return(0)
}
}
#Second strategy is to put all of money down for the first k tasks
#' Simulate problem under strategy 2
#'
#' @param A all of the money we are investing
#' @param n stages (total components)
#' @param k components we would like to have successfully built
#'
#' @return whether k successful components are built or not
#' @export
simulate_problem_strategy_2 <- function(A, n, k){
count.success <- 0
for(i in 1:k){
if(rbinom(1, 1, P(A/k)) == 1){
count.success <- count.success + 1
}
}
if(count.success >= k){
return(1)
}else{
return(0)
}
}
A <- 10000
n <- 50
num_simulations <- 1000
numeric(num_simulations) -> strategy_1.rewards
numeric(num_simulations) -> strategy_2.rewards
storing.strategy.1.sums <- numeric(10)
storing.strategy.2.sums <- numeric(10)
k.index <- 1
for(k in seq(1, n, 2)){
for(p in 1:num_simulations){
simulate_problem_strategy_1(A, n, k) -> strategy_1.rewards[p]
simulate_problem_strategy_2(A, n, k) -> strategy_2.rewards[p]
}
sum(strategy_1.rewards)->storing.strategy.1.sums[k.index]
sum(strategy_2.rewards)->storing.strategy.2.sums[k.index]
k.index <- k.index + 1
}
df.k <- data.frame(k = seq(1, 50, 2), strategy_1 = storing.strategy.1.sums/num_simulations, strategy_2 = storing.strategy.2.sums/num_simulations)
ggplot() +
geom_line(data = df.k, aes(x = k, y = strategy_1, colour = "Equally distributed"), size = 2) +
geom_point(data = df.k, aes(x = k, y = strategy_1, colour = "Equally distributed"), shape = 8, stroke = 2) +
geom_line(data = df.k, aes(x = k, y = strategy_2, colour = "All in k"), size = 2) +
geom_point(data = df.k, aes(x = k, y = strategy_2, colour = "All in k"), shape = 8, stroke = 2) +
labs(x = "k (n=50)", y = "Success Rate", colour = "Strategy") +
ggtitle("Optimal solution") +
theme(
panel.grid.major = element_blank(),
panel.grid.minor = element_blank(),
panel.background = element_blank(),
axis.line = element_line(colour = "black"),
plot.title = element_text(hjust = 0.5, size = 20),
axis.text = element_text(size = 15),
text = element_text(size = 18)
) +
scale_color_manual(values = c("Equally distributed" = "#D96098", "All in k" = "#BEAEE2"))
Interesting! We see that the first strategy (plotted in pink) is optimal even if we increase the number of successful components k when p(x) is convex.
Problem is taken from Derman et al, 1974.
Let’s consider the special case when P(x)=x and we want to maximize the probability of getting two components successfully built. Below, we simulate the case and compare to the theory mentioned in the paper.
The optimal policy \(\pi\) is said to allocate \(y/r\) - with \(y\) being the amount of our present money and \(r\) the number of stages left - when we have not yet built a successful component yet and allocate \(y\) - our entire money - once we have one component successfully built.
#' Probability of successfuly constructing a component with
#' investment x
#'
#' @param x the amount of investment
#'
#' @return either the probability of succeeding or 1 in case we invest more money than needed
#' @export
P <- function(x){
return(min(x, 1))
}
#Let's simulate the problem
#' Simulate the special case problem
#'
#' @param A the total amount of money we can invest
#' @param n the number of stages
#'
#' @return whether two components will be successfully built or not
simulate_problem <- function(A, n){
successful.components <- 0 #Track successful components
y <- A #How much money we are starting with
for(stage in 1:n){
if(successful.components==0){ #we allocate y/r if nothing has been built yet
x <- y/(n-stage+1)
}
if(successful.components==1){ #allocate all of our money once we have one component built
x<-y
}
if(y==0){ #if we don't have any money then we can't invest
x<-0
}
#at each stage simulate whether the component will be successfully built or not
p <- P(x)
if(rbinom(1, 1, p)==1){
successful.components <- successful.components +1
}
if(successful.components==2){
return(TRUE)
}
y <- y - x #of course we lose the money we invest at each stage
}
return(FALSE)
}
# #Equation is taken from Derman et al, 1974
#
# #' Theory equation of the probability of getting 2 components successfully built under that policy
# #'
# #' @param n the number of stages
# #' @param y the total amount of money we invest
# #'
# #' @return the probability of successfully building two components
# #' @export
# U <- function(n, y){
#
# return((1-y/n)^n + y -1)
#
# }
We are comparing simulations and theory for different investment amounts. Obviously we expect to see the probability increase for higher investment amounts.
#Do a 1000 simulations
sim.vec.A <- c()
for(A in seq(0, 1, 0.01)){
sim.vec <- c()
for(k in 1:1000){
sim.vec <- c(sim.vec, simulate_problem(A, 10))
}
sim.vec.A<-c(sim.vec.A,sum(sim.vec)/1000)
}
# #Plug into the theoretical equation
# theory.vec <- c()
# for(y in seq(0, 1, 0.01)){
#
# theory.vec <- c(theory.vec, U(10, y))
#
# }
I would suggest you pause at this point and try to write the optimality equation before seeing the solution below.
#' Optimality equation function
#'
#' @param y the amount of money we are investing
#' @param n the number of stages
#'
#' @return probability of building two successful components
#' @export
V <- function(y, n){
#As always we write the boundary conditions first
#We know that we have not defined the boundary conditions correctly if we get a C stack limit error.
if(y==0){
return(0)
}
if(n==0){
return(0)
}
#Check which x will maximize the probability of getting two components successfully built.
max.x <- 0
for(x in seq(0, y, 0.1)){
p <- P(x)
#Either we successfully build a component and invest our entire money in the next task and the process basically stops or we fail and we continue the same process with one less stage and less money (the money that we invested is now lost).
hold.reward <- max(p*(y-x) + (1-p)*V((y-x), n-1))
max.x <- max(max.x, hold.reward)
}
return(max.x)
}
#Plot theory vs simulations
theory.vec <- c()
for(y in seq(0, 1, 0.01)){
theory.vec <- c(theory.vec, V(y, 10))
}
res.df <- data.frame(y=seq(0, 1, 0.01), simulations = sim.vec.A, theory = theory.vec)
ggplot(res.df)+geom_point(aes(x=y, y=simulations), stroke=2
, color = "#D96098", shape=8)+
geom_line(aes(x=y, y=theory), size = 1,color = "#F7DBF0")+
labs(x = "Investment money (y)", y = "Prob of building \n2 successful components", title = "special case p(x)=x") +
theme(
panel.grid.major = element_blank(),
panel.grid.minor = element_blank(),
panel.background = element_blank(),
axis.line = element_line(colour = "black"),
plot.title = element_text(hjust = 0.5, size = 20),
axis.text = element_text(size = 15),
text = element_text(size = 18)
) +
labs(tag = "A")
Coming soon…
Part II of these series for more advanced problems on optimal allocation.
We are going to start by considering a simple case, let’s say we have \(N\) periods to invest and at each period we invest a certain amount of money, \(y\), (we invest with certainty at each period, our total money is \(A\)). The return that we get is \(R(y)\). The return is not going to be available for investment in the future periods and we get it at the end of the \(N\) periods. How much do we invest at each stage to maximize our total return.
Okay, first let’s define the \(R(x)\) function. This is quite similar to \(p(x)\) in the problems we covered before. I am going to use a simple convex function, \(R(x) = \sqrt{x}\).
#' Return function
#'
#' @param x investment amount
#'
#' @return the return from investment
#' @export
R <- function(x){
return(sqrt(x))
}
Let’s think of how we can write the optimality equation. The base case is when we are at the last period, we invest and we get R(x) back. Overall, we try to maximize the expected return we get from the current period and future periods. So technically we are thinking \(V_n(A) = \max_{0 \le y \le A}[R(y) + V_{n-1}(A-y)]\). Let’s write a function for theory:
#' optimality equation function
#'
#' @param n number of stages
#' @param A total amount of money we can invest
#'
#' @return the expected return
#' @export
V <- function(n, A){
if(n==1 || A==0){ #define boundary conditions
return(R(A))
}
#Find the y that maximizes the function
hold.max <- 0
for(y in seq(0, A, 0.2)){
expected_reward <- max(R(y)+V(n-1, A-y))
max(expected_reward, hold.max)->hold.max
}
return(hold.max)
}
Let’s think about the optimality equation a little bit more. If I am at the boundary, I invest \(y\) amount of money and get \(R(y)\) as a return (and that is because we only have one action, we invest with certainty). Now if I had two periods to go then I know \(V_2(A) = \max_{0 \le y \le A}[R(y) + V_{1}(A-y)]\). \(V_{1}(A-y)\) is essentially \(R(A-y)\). So basically the question is how do I pick y such that \(R(y)+R(A-y)\) is maximized which would be \(\sqrt(y)+\sqrt(A-y)\). To find out where the maximum happens we can see where the derivative is 0. \(\frac{1}{2} y^{-1/2} + \frac{1}{2} (A-y)^{-1/2} = 0\). If we cancel out the \(\frac{1}{2}\) then we get \(A-y=y\) which gives us \(y=A/2\).
Let’s move to the next stage. we are trying to find the \(y\) that maximizes the optimality equation.
\(V_3(A) = R(y)+V_2(A-y)\). We know that \(V_2(A-y) = \sqrt{2}\sqrt{A-y}\). To find the \(y\) that maximizes \(V_3(A)\), we set its derivative to zero and we end up with: \(\sqrt{2(A-y)} = \sqrt{2y}\). Then we get \(y=A/3\). By induction we see that the optimal strategy \(\pi\) is to equally distribute our money over stages (\(y=A/N\)).
So essentially, the total return would be \(\sqrt{n*A}\) with \(n\) being the number of stages and A the total amount of money we are investing.
theory_vec <- c()
formula_vec <- c()
stages <- 5
for(A in seq(0, 10, 1)){
theory_vec <- c(theory_vec, V(stages, A))
formula_vec <- c(formula_vec, stages*sqrt(A/stages))
}
res.df <- data.frame(y = seq(0, 10, 1), theory = theory_vec, formula = formula_vec)
ggplot(res.df)+geom_point(aes(y, theory), stroke=2
, color = "#D96098", shape=8)+
geom_line(aes(y, formula), size = 1,color = "#F7DBF0")+
labs(x = "Investment money (y)", y = "Return", title = "Deterministic case") +
theme(
panel.grid.major = element_blank(),
panel.grid.minor = element_blank(),
panel.background = element_blank(),
axis.line = element_line(colour = "black"),
plot.title = element_text(hjust = 0.5, size = 20),
axis.text = element_text(size = 15),
text = element_text(size = 18)
) +
labs(tag = "A")
Let’s add some uncertainty to the situation. What if at each stage, there is an opportunity to invest that comes up. If the opportunity to invest does not come up then we cannot invest. That probability is \(p\). Let’s revise the optimality equation.
We can consider the occurrence of the opportunity to invest with an indicator function (\(I\)). The indicator function returns 1 when the opportunity to invest comes up else it returns 0.
\(V_n(A, 1) = R(y) + p V_{n-1}(A-y, 1) + (1-p) V_{n-1}(A-y, 0)\),
\(V_n(A, 0) = V_{n-1}(A, 0)(1-p) + V_{n-1}(A, 1)p\)
#' Optimality equation for the sequential allocation model with uncertainty
#'
#' @param n number of stages
#' @param A total amount of money for investment
#' @param I the indicator function, input can be either 1 or 0
#' @param p the probability of the investment opportunity arising
#'
#' @return the expected reward
#' @export
V <- function(n, A, I, p){
#Define boundary conditions
if((n==1 || A==0)&&I==0){
return(0)
}
if((n==1 || A==0)&&I==1){
return(R(A))
}
#Search for the y that maximizes the bellman equation
hold.max <- 0
for(y in seq(0, A, 0.2)){
if(I == 0){
expected_reward <- V(n-1, A, 0, p)*(1-p) + p*V(n-1, A, 1, p)
}
else{
expected_reward <- R(y) + p*V(n-1, A-y, 1, p) + (1-p)*V(n-1, A-y, 0, p)
}
max(expected_reward, hold.max)->hold.max
}
return(hold.max)
}
Coming soon, more on sequential allocation problems…
Imagine you are presented with n offers in sequential order. You don’t know the value of the offer \(i\) until you are at stage \(i\) and the offer is presented to you. Therefore, your only option to realize how good or bad the offer is, is to compare it with the previous offers. You have the option to either reject the offer or accept the offer. If you accept it the game will stop and you have either selected the best offer and win or not selected the offer and lose. If you reject it then you move on to the next offer. This is a famous problem known as the “Optimal stopping problem” or the “Secretary problem”. There is a great video that explains the mathematics behind this problem by Numberphile. It explains how we can select the best toilet at a music festival
The overall strategy of such problems is to keep rejecting the offers up to stage \(k\) and then start comparing the offers to anything that came before stage \(k+1\). If any offer is superior then we accept it, otherwise we keep rejecting and moving forward.
With that let’s simulate the situation.
#' function for simulating the optimal stopping problem
#'
#' @param offers the sequence of offers presented
#' @param i the stage of the offer
#' @param n the total number of stages
#'
#' @return the offer selected
#' @export
select_offer <- function(offers, i, n){
for(j in (i+1):n){
if(offers[j]>max(offers[1:i])){
return(j)
}
}
return(n)
}
Simulate the situation
n <- 1000 #The number of offers
hold.diff.i <- c()
for(i in seq(100, 500, 10)){ #This is the point where we stop rejecting
hold.res <- c()
for(k in 1:5000){ #Repeat 5k times for each i
offers <- rnorm(1000) #Generate random offer scores
hold.vec <- c()
selected.offer <- c()
select_offer(offers, i, n) -> hold.offer.idx
hold.vec <- c(hold.vec, hold.offer.idx)
selected.offer <- c(selected.offer, offers[hold.offer.idx])
#Find out if our strategy was successful in finding the best offer
#or not
hold.res<-c(hold.res, (selected.offer==max(offers)))
}
#Find the success rate
hold.diff.i <- c(hold.diff.i, sum(hold.res)/5000)
}
Find the success rates for different stage number (\(i\)) where you stop rejecting the offer and start comparing.
results.df <- data.frame(i = seq(100, 500, 10), success_rate = hold.diff.i)
ggplot(results.df)+geom_point(aes(x=i, y=success_rate), stroke=2
, color = "#D96098", shape = 8)+
labs(x = "i", y = "Success rate", title = "Best offer") +
geom_vline(xintercept = floor(n * 1 / exp(1)), linetype="dashed")+
theme(
panel.grid.major = element_blank(),
panel.grid.minor = element_blank(),
panel.background = element_blank(),
axis.line = element_line(colour = "black"),
plot.title = element_text(hjust = 0.5, size = 20),
axis.text = element_text(size = 15),
text = element_text(size = 18)
) +
labs(tag = "A")
Interesting, you see that even in simulations it is clear that the success rate is highest when we are stage \(n/e\).
Reference: The theoretical solution to this problem is taken from Smith, 1975.
Consider the optimal stopping problem with the possibility of rejection. Imagine that we are trying to hire a secretary. There are a total of \(N\) secretaries and at the end of each interview, we can decide to either reject or accept the job applicant for the position. Either we select the applicant and the process is over or we reject the applicant and move on to the next one. We can’t go back to an applicant we had rejected in the past. In this particular problem, we are considering the case where the secretary has a choice of either accepting or rejecting the opportunity if given to him/her. The probability of the secretary accepting the offer is \(p\). Question is when should be stop rejecting the secretaries and start accepting to increase our chances of finding the best secretary?
Let’s start by simulating the problem
#' Simulate the secretary problem with the possibility of rejection.
#'
#' @param offers the set of offers, in this case we can consider the scores of the secretaries with 100 being the best and 1 the worst secretary.
#' @param i denotes at what stage we stop rejecting and start comparing to the previous candidates
#' @param p the probability of the secretary accepting the offer
#'
#' @return the applicant that accepts the offer
#' @export
simulate_problem <- function(offers, i, p){
for(j in (i+1):length(offers)){
if(offers[j] > max(offers[1:i])){
prob <- rbinom(1, 1, p)
if(prob==1){
return(j)
}
}
}
return(length(offers))
}
#Simulate the problem 0.5k times
num_simulations <- 500
matrix(NA, nrow = length(seq(2, 1000, 10)), ncol = 3)->store.mat
count <- 1
for(p in c(0.5, 0.7, 0.9)){
success.rate.vec <- c()
for(N in seq(2, 1000, 10)){ #try different stages at which we stop rejecting
success.rate <- 0
for(simulations in 1:num_simulations){
offers <- sample(seq(1, 1000, 1))
store.res <- simulate_problem(offers, N, p)
if(offers[store.res]==1000){
success.rate <- success.rate + 1
}
}
#find the success rate
success.rate/num_simulations -> success.rate
success.rate.vec <- c(success.rate.vec, success.rate)
}
store.mat[,count] <- success.rate.vec
count <- count + 1
}
Plot the simulation results
as.data.frame(store.mat)->store.mat
colnames(store.mat)<-c("P = 0.5", "P = 0.7", "P = 0.9")
store.mat$r <- seq(2, 1000, 10)
#' The stage at which we stop rejecting
#'
#' @param p the probability of the secretary accepting the job
#'
#' @return the stage which we stop rejecting
#' @export
r <- function(p){
return(p^(1/(1-p)))
}
ggplot()+
geom_point(data = store.mat, aes(x=r, y=`P = 0.5`, colour = "p = 0.5"), shape = 8, stroke = 2)+
geom_point(data = store.mat, aes(x=r, y=`P = 0.7`, colour = "p = 0.7"), shape = 8, stroke = 2)+
geom_point(data = store.mat, aes(x=r, y=`P = 0.9`, colour = "p = 0.9"), shape = 8, stroke = 2)+
geom_point(aes(x = r(0.5)*1000, y = r(0.5)), color = "#EE99C2", shape = 8, stroke = 5)+
geom_point(aes(x = r(0.7)*1000, y = r(0.7)), color = "#D96098", shape = 8, stroke = 5)+
geom_point(aes(x = r(0.9)*1000, y = r(0.9)), color = "#86469C", shape = 8, stroke = 5)+
geom_line(aes(x = r(seq(0, 0.99, 0.01))*1000, y = r(seq(0, 0.99, 0.01)), colour = "Optimal solution"))+
theme(
panel.grid.major = element_blank(),
panel.grid.minor = element_blank(),
panel.background = element_blank(),
axis.line = element_line(colour = "black"),
plot.title = element_text(hjust = 0.5, size = 20),
axis.text = element_text(size = 15),
text = element_text(size = 18)
)+
labs(x = "r", y = "Success rate", colour = "")+
scale_color_manual(values = c("p = 0.5" = "#FFCDEA", "p = 0.7" = "#FB9AD1", "p = 0.9" = "#BC7FCD", "Optimal solution" = "#00224D"))
The special example below is taken from the “Algorithms to live by” book by Brian Christian and Tom Griffiths.
Let’s consider another variation. Let’s assume that the secretary may be recalled at a later time but the chance of the secretary accepting the offer would then be 50% at a later time (the probability of the acceptance of a belated offer is lower than an immediate offer). We start simulating this basic case and then move on to more advanced variations of the secretary problem.
According to the book, in this case of the optimal stopping problem, the optimal strategy \(\pi\) would be to stop rejecting after we interview 61% of the offers and start comparing offers afterwards.
#' Simulate the problem
#'
#' @param offers the score of offers
#' @param i the stage at which we stop rejecting
#' @param p the probability of the acceptance of a belated offer
#'
#' @return the final accepted offer
#' @export
simulate_problem <- function(offers, i, p){
for(j in (i+1):length(offers)){
if(offers[j] > max(offers[1:i])){
return(offers[j])
}
}
heads <- rbinom(1, 1, p) #try to see if a belated offer is accepted or not
if(heads==1){
return(max(offers[1:i]))
}else{ #if not return the last offer
return(offers[length(offers)])
}
}
#simulate the situation 5k times
num_simulations <- 5000
hold.offer.sum <- c()
for(i in 30:99){ #try different stages of when we stop rejecting
hold.accepted.offer <- c()
for(k in 1:num_simulations){
offers <- sample(seq(1, 100, 1))
hold.accepted.offer <- c(hold.accepted.offer, (simulate_problem(offers, i, 0.5) == 100))
}
hold.offer.sum <- c(hold.offer.sum, sum(hold.accepted.offer))
}
res.df <- data.frame(i = 30:99, success_rate = hold.offer.sum/num_simulations)
ggplot(res.df, aes(x = i, y = success_rate))+geom_point(color = "#D96098", shape = 8, stroke = 2)+
theme(
panel.grid.major = element_blank(),
panel.grid.minor = element_blank(),
panel.background = element_blank(),
axis.line = element_line(colour = "black"),
plot.title = element_text(hjust = 0.5, size = 20),
axis.text = element_text(size = 15),
text = element_text(size = 18)
)+
geom_vline(xintercept = 61, linetype="dashed")+
labs(x = "r", y = "Success rate")
Megan is looking for a man in finance with trust fund and blue eyes who is also 195 cm tall. Suppose there are a sequence of men in finance who have trust funds and blue eyes and Megan is trying to find the tallest among them (their heights range from 145cm to 195cm). Megan starts by dating the first man, she needs to make a decision of either rejecting the man and moving to the next candidate or settling down. If she settles down, then the problem stops. However, if she decides to reject the man, she will move on to the next man in the sequence of men.
If Megan rejects a candidate, she cannot return to him later; her decision is final. Question is how can Megan maximize her chances of finding the tallest man among the candidates?
To maximize her chances of ending up with the tallest man, Megan employs a strategy similar to the “full information” version of the secretary problem. In this version, Megan knows the height of each man (relative to all men) after dating them, which allows her to make an informed decision at each step. Her goal is to choose the tallest man possible among the candidates. The challenge, however, lies in deciding when to stop searching and settling down. If she settles too early, she might miss out on a potentially taller man who appears later in the sequence. If she waits too long, she risks ending up with a shorter candidate towards the end.
To optimize her chances, Megan uses a threshold-based strategy. At each stage depending on the number of candidates she has left to date, she considers the probability of finding a taller man if she rejects the current candidate. To understand the problem better, let’s consider the problem backwards. If Megan has rejected all candidates and is down to the last candidate then she has no other choice than to select him as her partner. If she were at the candidate who is one candidate to the last one, she can check to see if his height exceeds the 50th percentile of the heights of men in the general population. If the candidate’s height is less than the 50th percentile then it would be better to reject the man and move on to the last candidate because there is a 50% chance that the last candidate’s height falls above the 50th percentile. If Megan was at the two to the last candidate stage then she would want to only reject the candidate if the candidate’s height falls below a threshold for which moving to the next candidate has a greater than 50% chance of finding a better candidate. Mathematically that means \(1-(0.5)^{1/2}=0.5\) which gives her a threshold of around ~70%. Likewise she can find the particular threshold of rejection for each stage k to be \(0.5^{1/k}\).
Below we find thresholds of rejection
#' Finding the threshold for rejection
#'
#' @param num.applicants number of total applicants
#'
#' @return the threshold of rejection for each stage
#' @export
find.threshold <- function(num.applicants){
c() -> hold.thresholds
for(k in 1:num.applicants){
hold.thresholds <- c(hold.thresholds , 0.5^(1/k))
}
return(hold.thresholds)
}
thresholds <- find.threshold(10)
df.threshold <- data.frame(applicant = seq(0, 10, 1), threshold = c(0, thresholds))
ggplot(df.threshold, aes(x = applicant, y = threshold))+geom_line(size=2, color = "#538392")+
labs(x="Number of applicants", y="Threshold")+
ggtitle("Threshold of Rejection")+
theme(
panel.grid.major = element_blank(),
panel.grid.minor = element_blank(),
panel.background = element_blank(),
axis.line = element_line(colour = "black"),
plot.title = element_text(hjust = 0.5, size=20),
axis.text = element_text(size = 15),
text = element_text(size=18))+labs(tag="A")
Simulate the problem
#' Simulate the Megan problem
#'
#' @param candidate.heights height of candidates
#' @param reference.heights percentile of candidates from general population,
#' should be a dataframe and have two columns named Percentile and Number. Percentile
#' representing the specific percentile and Number representing the height value
#'
#' @return the position of the candidate Megan chooses
#' @export
simulate.megan.problem <- function(candidate.heights, reference.heights){
num.applicants <- length(candidate.heights)
find.threshold(num.applicants) -> thresholds
candidate.position <- num.applicants - 1
for(candidate.height in candidate.heights[-num.applicants]){
if(candidate.height > (reference.heights[which.min(abs(reference.heights$Percentile - thresholds[num.applicants]*100)),])$Number){
return(which(candidate.heights==candidate.height)[1])
}
candidate.position <- candidate.position - 1
}
return(num.applicants)
}
#Let's find the specific heights for different percentiles
mean.value <- 175 #Let's consider the mean height to be 175cm
sd.value <- 10 #with a standard deviation of 10cm
#Generate 5k random numbers
random.numbers <- rnorm(5000, mean = mean.value, sd = sd.value)
#Filter for heights not exceeding 195cm and not less than 140cm
filtered.numbers <- random.numbers[random.numbers >= 140 & random.numbers <= 195]
final.sample <- sample(filtered.numbers, 1000, replace = TRUE)
ecdf.function <- ecdf(final.sample)
percentiles <- ecdf.function(final.sample) * 100
reference.heights <- data.frame(Number = final.sample, Percentile = percentiles)
#Let's now simulate the problem
c()->changing.applicant.sims
#Let's see what happens if we change the number of applicants in the pool.
for(num.applicants in c(10, 20, 30, 50, 100)){
num.simulations <- 5000 #Let's simulate the problem 5k times
c() -> candidate.heights.sim
for(i in 1:num.simulations){
success <- 0
candidate.heights <- sample(140:195, num.applicants, replace = TRUE)
if(max(candidate.heights) == candidate.heights[simulate.megan.problem(candidate.heights, reference.heights)]) {success <- 1}
candidate.heights.sim <- c(candidate.heights.sim, success)
}
changing.applicant.sims <- c(changing.applicant.sims, sum(candidate.heights.sim)/num.simulations)
}
res.df <- data.frame(num_applicants = c(10, 20, 30, 50, 100), probability_success = changing.applicant.sims)
ggplot(res.df, aes(x = num_applicants, y=probability_success))+geom_point(stroke = 2, color = "#D96098", shape = 8)+
geom_line(size = 2, color = "#F7DBF0")+
theme(
panel.grid.major = element_blank(),
panel.grid.minor = element_blank(),
panel.background = element_blank(),
axis.line = element_line(colour = "black"),
plot.title = element_text(hjust = 0.5, size = 20),
axis.text = element_text(size = 15),
text = element_text(size = 18)
)+labs(x = "Number of applicants", y="Probability of success", tag="B")+
ggtitle("Full Information Secretary Problem")+ylim(c(0, 1))
We see that Megan has a pretty good chance of finding her ideal man in finance using this strategy even if the number of available men to date is low (with ~60% of getting the best man among 10 men).
Note: The story behind the problem is inspired by the song written by Megan Boni (Lamour, 2024).
Note: The solution to the full-information secretary problem is taken from Christian & Griffiths’ “Algorithm to live by” book (2016).
References:
Christian, B., & Griffiths, T. (2016). Algorithms to live by: The computer science of human decisions. Macmillan.
Lamour, J. (2024, May 29). How a viral song about “looking for a man in finance” took over the internet. TODAY.com. https://www.today.com/popculture/music/tiktok-im-looking-for-a-man-in-finance-megan-boni-interview-rcna151152
The following problem is taken from Guttman, 1959.
A game involves taking random numbers from a uniform distribution. Here we take the distribution from 0 to 1. We have N opportunities to draw numbers from the distribution. At each stage we either reject the opportunity and move to the next stage or accept the opportunity and the process stops. The question is when do we stop and accept the drawn number. The optimal strategy \(\pi\) is to stop when the number we get is higher than a specific threshold. The threshold is 0.5 when we have only one opportunity since the expected value of the drawn number is 0.5. When we have two chances then for the first draw either the drawn number is less than 0.5 which happens with probability 0.5 and we move on to the next case with an expected threshold of 0.5 OR the drawn number is more than 0.5 with probability 0.5 but the expected value would then be 0.75 (because the number is from 0.5 to 1). Therefore \(E_2 = 0.5(0.75)+0.5(0.5)\). We can follow the same logic to find \(E_3\). Either the drawn number is less than \(E_2\) which is 0.625 and thus we move to the second opportunity which has an expected value of 0.625 or the drawn number exceeds 0.625 with a probability of (1-0.625) and an expected value of 0.8125 (\(E_3 = 0.375(0.8125)+0.625(0.625)\)).
#' Probability density function
#'
#' @param x parameter x
#'
#' @return the probability density function for a uniform distirbution from 0
#' to 1.
#' @export
f <- function(x){
dunif(x, min = 0, max = 1)
}
#' Threshold function, we stop if the drawn number is greater or equal to E_n
#' at stage n
#'
#' @param n the number of opportunities left to draw
#'
#' @return the threshold for stopping at that stage
E <- function(n){
#Define the boundary condition
if(n==0){
return(0)
}
hold.integrand.first.part <- function(x) {x * f(x)}
first.part <- integrate(hold.integrand.first.part, E(n-1), Inf)$value
hold.integrand.second.part <- function(x) {f(x)}
second.part <- integrate(hold.integrand.second.part, - Inf, E(n-1))$value
return(first.part + E(n-1) *second.part)
}
E_vector <- c()
for(i in 1:12){
E_vector <- c(E_vector, E(i))
}
E_res <- data.frame(left_opportunity = 1:12, E = E_vector)
ggplot(E_res) +
geom_point(aes(x=left_opportunity, y = E),stroke=2, color = "#538392", shape=8)+
theme(
panel.grid.major = element_blank(),
panel.grid.minor = element_blank(),
panel.background = element_blank(),
axis.line = element_line(colour = "black"),
plot.title = element_text(hjust = 0.5, size=20),
axis.text = element_text(size = 15),
text = element_text(size=18))+labs(tag="A", x="Left opportunity")
Results are consistent with Table 1 from Guttman, 1959.
A deck of 52 cards, before we turn the cards we can guess whether it is the ace of spades or not.
Let’s start with the base case. The base case is when you only have one ace of spades card so the probability of that one card being the ace of spades is 1. Alternatively, you have one non-spade card so the probability of having an ace of spades is 0. When you move to the next case, there are two possible scenarios,
I guess intuitively it doesn’t make much of a sense to think about this problem this way because unlike the other optimal stopping problems, there are no comparisons and all other cards except for the ace of spades are essentially the same. Consequently, the probability of guessing correctly would always be \(1/N\). But let’s see what happens if we try to write it as a dynamic programming problem.
#' Probability of guessing the ace of spades correctly depending on the number
#' of cards
#'
#' @param n number of total cards
#' @param chances number of cards left to guess
#'
#' @return the probability of guessing ace of spades correctly
#' @export
V <- function(n, chances){
if(n==1){
return(1) #once you get to this point, the probability of
#the card being the ace of spades is 1.
}
if(chances>1){
return(max(1/n*V(n-1, (chances-1)), (n-1)/n*V(n-1, chances)))
}else{
return(max(1/n, (n-1)/n*V(n-1, 1)))
}
}
#We can try plotting the probability
V_vector <- c()
for(chances in 1:5){
c(V_vector, V(52, chances))->V_vector
}
#And the probability of guessing the ace of spades correctly remains constant.
And we see that this confirms out initial thoughts. I am guessing the point of the question was for us to be able to write the Bellman equation correctly which we have done.
\(V_N = \max[1/N, \frac{N-1}{N} V_{N-1}]\)
Part D, if we were to pick any of the thirteen spades then the problem would become something like this:
#' The probability of guessing the spades correctly
#'
#' @param no_spades number of spades
#' @param no_others number of other cards
#'
#' @return the probability of getting spades
V <- function(no_spades, no_others){
#Define boundary conditions first
if(no_spades == 0){
return(0) #you ran out of spades and guessed none
}
if(no_others == 0){
return(1) #you ran out of all other cards, you have 1 spade left
}
#Let's think about what happens
#We guess we have a spade card that comes with the probability of no_spaces/all cards
#We pass, either the passed card is a spades card or it is a non-spade card
return(max(no_spades/(no_spades+no_others), no_spades/(no_spades+no_others)*V(no_spades-1, no_others)+
no_others/(no_spades+no_others)*V(no_spades, no_others - 1)))
}
Problem #11 from Sheldon Ross’ book on stochastic dynamic programming
An urn initially contains \(n\) white and \(m\) black balls. Balls are selected at random, without replacement, and the decision maker wins one unit when a white ball is selected and loses one unit when a black ball is selected. At any point in time, the decision maker is allowed to quit playing. The objective is to maximize expected winnings.
Let’s start by simulating the problem. If we were to take balls at random from an urn, we would intuitively stop taking more balls once the number of white and black balls are the same or there are more black balls in the urn because there will be a 50% or more chance of losing.
#' Simulate the urn problem
#'
#' @param m the number of black balls
#' @param n the number of white balls
#'
#' @return the score
#' @export
simulate_problem <- function(m, n){
#We take balls at random, similar to assigning black and white to a sequence at random
results.seq <- sample(c(rep('b', m), rep('w', n)))
#assign scores for each stage
results.seq[results.seq=='b']<- -1
results.seq[results.seq=='w']<- +1
#We stop the process if any stage the number of black and white balls are equal.
for(k in 1:(m+n)){
if(sum(as.numeric(results.seq[k:(m+n)]))==0){
return(sum(as.numeric(results.seq[1:k])))
}
}
#otherwise we take all of the balls out and return the score
return(sum(as.numeric(results.seq)))
}
Then let’s write a recursive function for the theory. As always think of the base case, either we get to a point when \(m\) and \(n\) are equal and we stop (we return nothing), or we have only white or black balls left. If all of the balls are white then we get a score of \(+n\), if all are black then we get a score of \(-m\). Moving backward, either we pick a white ball with probability \(p\), add +1 to our score and move to the previous step with one less white ball or we pick a black ball with probability \(q\), subtract 1 from our score and move to the previous step with one less black ball.
#' Theory function
#'
#' @param m the number of black balls
#' @param n the number of white balls
#'
#' @return the expected reward
#' @export
V <- function(m, n){
if(m==n){
return(0)
}
if(n==0){
return(-m)
}
if(m==0){
return(n)
}
#The probability of getting whites is n/(m+n) or p
#The probability of getting blacks is m/(m+n) or 1-p or q
p <- n/(n+m)
q <- 1 - p
#Either we pick a white one with probability p, add +1 to our score and move to the previous step with one less white ball or we pick a black ball with probability q, subtract 1 from our score and move to the previous step with one less black ball.
#On the other hand, if we quit, our expected reward would be zero at that point so we are weighing the option of quiting or continuing.
expected.reward <- max(0, q * (V(m-1, n) - 1) + p * (1 + V(m, n-1)))
return(expected.reward)
}
Compare simulations and theory
simulation.res.all <- c()
theory.res.all <- c()
num_simulations<-100
for(i in 5:30){
simulation.res <- c()
V(5, i)->theory.res
for(k in 1:num_simulations){
c(simulation.res, simulate_problem(5, i))->simulation.res
}
simulation.res.all <- c(simulation.res.all, mean(simulation.res))
theory.res.all <- c(theory.res.all, theory.res)
}
res.df <- data.frame(n = 5:30, simulations = simulation.res.all, theory = theory.res.all)
ggplot(res.df)+geom_point(aes(x=n, y=simulations), stroke=2
, color = "#D96098", shape=8)+
geom_line(aes(x=n, y=theory), size = 1,color = "#F7DBF0")+
labs(x = "Number of white balls (n)", y = "Expected rewards", title = "Black balls, m = 5") +
theme(
panel.grid.major = element_blank(),
panel.grid.minor = element_blank(),
panel.background = element_blank(),
axis.line = element_line(colour = "black"),
plot.title = element_text(hjust = 0.5, size = 20),
axis.text = element_text(size = 15),
text = element_text(size = 18)
) +
labs(tag = "A")
Awesome, we see that simulations and theory match very well!
Goofspiel or game of pure strategy involves two players, one holding the diamonds and the other holding the hearts of a deck of cards. The spades are placed in the center and in each stage a spade is turned face up. The two players would then put two cards down and the one with the higher card value (with ace having a value of 1 and king a value of 13) wins the value of the faced up card in the center. The question is what strategy achieves this?
#' simulate a player playing the optimal strategy against a player who puts random cards down
#'
#' @return the scores of player 1 and player 2
#' @export
simulate_optimal_strategy <- function(){
center.cards <- sample(seq(1, 13, 1)) #Center cards drawn randomly
player.1.cards <- center.cards #optimal strategy - player 1 matches ceter cards
player.2.cards <- sample(seq(1, 13, 1)) #puts cards down randomly
player.1.scores <- 0;player.2.scores <- 0
for(i in 1:13){
if(player.1.cards[i] > player.2.cards[i]){
player.1.scores <- player.1.scores + center.cards[i]
}else if(player.1.cards[i] < player.2.cards[i]){
player.2.scores <- player.2.scores + center.cards[i]
}
}
return(list(player.1.scores, player.2.scores))
}
Simulate the game
#simulate the game 1000 times
num_simualtions <- 1000
player.1.scores <- c()
player.2.scores <- c()
for(i in 1:num_simulations){
res_lst <- simulate_optimal_strategy()
player.1.scores <- c(player.1.scores, res_lst[[1]])
player.2.scores <- c(player.2.scores, res_lst[[2]])
}
data.frame(score = player.1.scores)->player.1.scores
data.frame(score = player.2.scores)->player.2.scores
player.1.scores$player <- 'player1'
player.2.scores$player <- 'player2'
sim_res <- rbind(player.1.scores, player.2.scores)
To find the expected score for player I, we have to go over different cards and count the probability of the value of the current card exceeding that of player II. Then multiply it by the amount of the center card. For example, let’s start with the base case. We have two rounds with two cards ace and two. We can never win if we put an ace down (even if the other player puts their ace down, we both get a score of zero). If we put a two down then the chance of winning is \(\frac{1}{2}\) because either the other player puts a two down and we both get a score of 0 or they put an ace down and we win. In this case, it is obvious that we should reserve our two card for when two of spades comes up in the center.
By induction you can see that it is optimal to always match the strategy of the card in the center. The expected score for player I would be \(\sum_{i=1}^{N}i\frac{i-1}{N}\). If we replace \(N\) with the total number of cards, 13, we get an expected score of 56. Let’s see if it is consistent with our simulations of the game.
expected_winning <- 0
for(i in 1:13){
expected_winning <- expected_winning + i*((i-1))/13
}
ggplot(sim_res, aes(score, fill = player)) +
geom_density(alpha=0.4)+
geom_vline(xintercept = expected_winning, linetype="dashed", size=2)+
labs(x = "Score", y = "Density", title = "Game of pure strategy - 1k simulations") +
theme(
panel.grid.major = element_blank(),
panel.grid.minor = element_blank(),
panel.background = element_blank(),
axis.line = element_line(colour = "black"),
plot.title = element_text(hjust = 0.5, size = 20),
axis.text = element_text(size = 15),
text = element_text(size = 18)
) +
scale_fill_manual(values=c("#201E43", "#E3A5C7"))+
labs(tag = "A")
Example is taken from Bertsekas (Dynamic Programming and Optimal Control), Chapter 1, Example 1.2.1
Reference: Bertsekas, D. (2012). Dynamic programming and optimal control: Volume I (Vol. 4). Athena scientific.
Suppose you are playing against someone and you have the option to either play timidly or boldy. If you play boldly you win with probability \(p_w\) < 1/2. If you play timidly you draw with probability \(p_d\) > 1/2. If any of the players win they get a score of 1 while the other losing player gets a score of 0 and if there is a draw, then both players get a score of 0.5 for that match. You play two matches. If at the end of the second match you and the other player reach a draw, the game faces a sudden death meaning players play more and more matches until someone wins the game. Here we simulate and theoretically show the optimal strategy.
How do we find the optimal strategy? Let’s think of all of the possible scenarios:
We have four possible
scenarios, let’s calculate the probability of winning in each case:
CASE I: Timid - Timid
The only times we win under this strategy is if we draw the first two matches and win in the third match (sudden death happens) \(p_d^2 (p_w)\).
CASE II: Bold - Bold
Three possible routes we can win under this strategy. Luckiest scenario is we win both times then our probability of winning is \(p_w^2\). Alternatively, we win the first match, lose the others and then move to the third match and win that one. This case happens with probability \(p_w(1-p_w)p_w\). Similarly, we may lose the first match and win the second one then move to the third match and win that game with probability \((1-p_w)(p_w)(p_w)\). Overall the probability of winning in this case would be \(p_w^2 + 2(1-p_w)(p_w^2)\).
CASE III: Bold - Timid
We win the first match and draw the second with probability \(p_w(p_d)\). The other possible route of winning is if we win the first match, lose the second and win the third match with probability \(p_w(1-p_d)p_w\). The overall probability of winning in this case would be \(p_w(p_d)+p_w(1-p_d)p_w\).
CASE IV: Timid - Bold
We draw the first match and win the second with probability \(p_d(p_w)\). Lose the first match, win the second match and proceed to win the third match with probability \((1-p_d)(p_w)p_w\). The overall probability of winning in this case is \(p_d(p_w) + (1-p_d)(p_w)p_w\).
Let’s compare the strategies:
timid.timid <- function(p_d, p_w) return(p_d^2 * (p_w))
bold.bold <- function(p_d, p_w) return(p_w^2*(3-2*p_w))
timid.bold <- function(p_d, p_w) return(p_w*p_d + p_w^2*(1-p_d))
timid.timid.vector <- c()
bold.bold.vector <- c()
timid.bold.vector <- c()
p_d <- 0.9
for(p_w in seq(0.2, 0.5, 0.05)){
c(timid.timid.vector, timid.timid(p_d, p_w)) -> timid.timid.vector
c(bold.bold.vector, bold.bold(p_d, p_w))-> bold.bold.vector
c(timid.bold.vector, timid.bold(p_d, p_w))-> timid.bold.vector
}
win_prob_df <- data.frame(p_w = seq(0.2, 0.5, 0.05), timid_timid = timid.timid.vector,
bold_bold = bold.bold.vector,
timid_bold = timid.bold.vector)
In the book it is mentioned that the Timid-Timid strategy is always dominated by the others. Here, we show that although it will always be dominated by either the Bold-Timid and Timid-Bold strategies, it will not always be dominated by the Bold-Bold strategy.
Assume $p_d = $ 0.9. In that case the Timid-Timid strategy will be equally good as the Bold-Bold strategy when \(p_d^2 = p_w(3-2(p_w))\). If we plug in \(p_d\) in the equation, we get \(p_w\) equal to 0.35313730334 as one of the roots. We can see that whenever \(p_w\) is less than this value when \(p_d\) is 0.9 it is actually better to play timidly than boldly if we had the chance to decide between either playing completely timidly or boldly.
p_w_intersections <- c()
for(p_d in seq(0.55, 0.95, 0.05)){
f <- function(p_w) {
return(bold.bold(p_d, p_w) - timid.bold(p_d, p_w))
}
p_w_intersections <- c(p_w_intersections, uniroot(f, lower = 0.1, upper = 0.9)$root)
}
And we see that it is always optimal to play with the Timid-Bold strategy as opposed to the Bold-Bold strategy when \(2(p_w)\) is less than \(p_d\).
ggplot(win_prob_df)+
geom_rect(aes(xmin=0.2, xmax=0.35313730334, ymin=0.1, ymax=0.5), fill="#F1D3CE")+
geom_rect(aes(xmin=0.35313730334, xmax=0.9/2, ymin=0.1, ymax=0.5), fill="#D1E9F6")+
geom_rect(aes(xmin=0.9/2, xmax=0.5, ymin=0.1, ymax=0.5), fill="#C75B7A")+
geom_line(aes(x=p_w, y=timid_timid, colour = "Timid-Timid"), size = 2)+
geom_line(aes(x=p_w, y=bold_bold, colour = "Bold-Bold"), size = 2)+
geom_line(aes(x=p_w, y=timid_bold, colour = "Timid-Bold"), size = 2)+
geom_vline(xintercept = 0.35313730334, linetype='dashed')+
geom_vline(xintercept = 0.9/2, linetype='dashed')+
theme(
panel.grid.major = element_blank(),
panel.grid.minor = element_blank(),
panel.background = element_blank(),
axis.line = element_line(colour = "black"),
plot.title = element_text(hjust = 0.5, size = 20),
axis.text = element_text(size = 15),
text = element_text(size = 18)
) +
scale_color_manual(values = c("Timid-Timid" = '#5F6F65', "Bold-Bold" = "#522258", "Timid-Bold" = "#6482AD"))+
labs(x="p_w", y="Win probability", tag="A")
First, let’s start by defining functions for timid and bold play:
#' Function that returns the scores of both players when player 1 plays timidly
#'
#' @param player.1.scores player #1 score
#' @param player.2.scores player #2 score
#' @param p_d probability of a draw on timid strategy
#'
#' @return the score of both players at the end of a match when player 1 plays timidly
#' @export
timid.play <- function(player.1.scores, player.2.scores, p_d){
draw <- rbinom(1, 1, p_d)
if(draw == 1){
player.1.scores <- player.1.scores + 0.5
player.2.scores <- player.2.scores + 0.5
}else{
player.1.scores <- player.1.scores + 0
player.2.scores <- player.2.scores + 1
}
return(list(player.1.scores, player.2.scores))
}
#' Function that returns the scores of both players when player 1 plays boldly
#'
#' @param player.1.scores player #1 score
#' @param player.2.scores player #2 score
#' @param p_w probability of a win on bold strategy
#'
#' @return the score of both players at the end of a match when player 1 plays boldly
#' @export
bold.play <- function(player.1.scores, player.2.scores, p_w){
win <- rbinom(1, 1, p_w)
if(win == 1){
player.1.scores <- player.1.scores + 1
player.2.scores <- player.2.scores + 0
}else{
player.1.scores <- player.1.scores + 0
player.2.scores <- player.2.scores + 1
}
return(list(player.1.scores, player.2.scores))
}
#' Simulate the chess match
#'
#' @param p_d the probability of a draw under timid strategy
#' @param p_w the probability of a win under bold strategy
#'
#' @return whether player 1 wins or not
#' @export
simulate.chess.game <- function(p_d, p_w){
player.1.scores <- 0
player.2.scores <- 0
game.stage <- 1
sudden.death <- FALSE
while(game.stage < 3 || sudden.death == TRUE){
if(player.1.scores > player.2.scores){
score.lst <- timid.play(player.1.scores, player.2.scores, p_d)
score.lst[[1]]->player.1.scores
score.lst[[2]]->player.2.scores
}else{
score.lst <- bold.play(player.1.scores, player.2.scores, p_w)
score.lst[[1]]->player.1.scores
score.lst[[2]]->player.2.scores
}
if(game.stage >= 2 && (player.1.scores == player.2.scores)){
sudden.death <- TRUE
}else{
sudden.death <- FALSE
}
game.stage <- game.stage + 1
}
player.1.win <- FALSE
if(player.1.scores > player.2.scores){
player.1.win <- TRUE
}
return(player.1.win)
}
Now let’s test this with the parameters mentioned in the book, with \(p_w =\) 0.45 and \(p_d=\) 0.9 we should be getting a win probability of 0.53.
p_w <- 0.45
p_d <- 0.9
win.sum <- 0
num_simulations <- 10000
for(k in 1:num_simulations){
simulate.chess.game(p_d, p_w) + win.sum -> win.sum
}
print(win.sum/num_simulations)
## [1] 0.5382
And that is exactly what we get!
Let’s think of this problem theoretically. We play the first game boldly and there are three possible scenarios:
If we win in the first match and draw in the next match we have won and the game stops this happens with probability \(p_w(p_d)\).
We win in the first match and lose in the second match then we play bold for the third match and win with probability \(p_w(1-p_d)(p_w)\).
#Define theoretical winning probability function:
win_probability_theory <- function(p_d, p_w) p_w*((1-p_w)*p_w + p_w*(1-p_d))+ p_w*p_d
#Let's compare theory and simulations for different parameter sets
sim.mat<-matrix(0, nrow=length(seq(0.2, 0.45, 0.05)), ncol=length(seq(0.7, 0.95, 0.05)))
theory.mat<-matrix(0, nrow=length(seq(0.2, 0.45, 0.05)), ncol=length(seq(0.7, 0.95, 0.05)))
p_w <- seq(0.2, 0.45, 0.05)
p_d <- seq(0.7, 0.95, 0.05)
for(i in seq_along(p_w)){
for(j in seq_along(p_d)){
for(k in 1:num_simulations){
sim.mat[i, j] <- sim.mat[i, j] + simulate.chess.game(p_d[j], p_w[i])
}
theory.mat[i, j] <- win_probability_theory(p_d[j], p_w[i])
}
}
sim.mat/num_simulations -> sim.mat
rownames(sim.mat) <- paste("p_w:",p_w)
colnames(sim.mat) <- paste("p_d:", p_d)
rownames(theory.mat) <- paste("p_w:", p_w)
colnames(theory.mat) <- paste("p_d:", p_d)
library(pheatmap)
pheatmap(sim.mat, display_numbers = T,
color = colorRampPalette(c('white','#522258'))(100),
cluster_rows = FALSE, cluster_cols = FALSE, fontsize_number = 15)
pheatmap(theory.mat, display_numbers = T,
color = colorRampPalette(c('white','#522258'))(100),
cluster_rows = FALSE, cluster_cols = FALSE, fontsize_number = 15)
The following problem is taken from Arthur, 1989.
Reference: Arthur, W. B. (1989). Competing technologies, increasing returns, and lock-in by historical events. The economic journal, 99(394), 116-131.
Suppose we have two technologies, \(A\) and \(B\) which are competing for adoption by a market consisting of two types of agents, \(R\), and \(S\). \(R\) agents have a preference for the \(A\) technology and \(S\) agents have a preference for the \(B\) technology due to political reasons. We start with a very basic example:
In the most simple problem, at each time point one agent enters the
systems and makes a decision to adopt either strategy. \(R\) agents have a strong preference for
\(A\). Likewise \(S\) agents have a strong preference for the
\(B\) technology. At each time point,
the expected return depends on the type of agent that makes the decision
and the previous adoptions. The number of previous \(A\) adoptions is referred to as \(n_A\) and the number of previous \(B\) technology adoptions is referred to as
\(n_B\) (\(n_A + n_B = n\)).
Depending on whether an \(R\) or \(S\) agent comes up with choose the return
based on the following payoff matrix:
Agent Type | Technology A | Technology B |
---|---|---|
R agent | \(a_R + r n_A\) | \(b_R + r n_B\) |
S agent | \(a_S + s n_A\) | \(b_S + s n_B\) |
To account for the preference for \(A\) or \(B\), we consider \(a_R > b_R\) and \(a_s < b_s\).
In the simple case we consider here, \(R\) agents always adopt technology \(A\) unless if technology \(B\) becomes widely adopted, increases in number and it would result in a higher payoff if \(R\) agents switch their strategy. Likewise, \(S\) agents always adopt technology \(B\) unless if technology \(A\) becomes widely adopted, increases in number and it would result in a higher payoff if \(S\) agents switch their strategy. We define the threshold for switching below:
find.delta_R <- function(b_R, a_R, r) return (b_R - a_R)/r
find.delta_S <- function(b_S, a_S, s) return (b_S - a_S)/s
Define the simulation function
#' Simulate the problem
#'
#' @param stages the number of stages
#' @param a_R the return for agent R adopting technology A
#' @param a_S the return for agent S adopting technology A
#' @param b_R the return for agent R adopting technology B
#' @param b_S the return for agent S adopting technology B
#' @param r the return for agent R based on previous adoptions
#' @param s the return for agent S based on previous adoptions
#'
#' @return a list containing the total return, number of A adoptions, number of B adoptions
#' @export
simulate.problem <- function(stages, a_R, a_S, b_R, b_S, r, s){
#initialize parameters
n_A <- c(0) #number of A adoptions
n_B <- c(0) #number of B adoptions
returns <- c(0) #return
delta_R <- find.delta_R(b_R,a_R,r)
delta_S <- find.delta_S(b_S,a_S,s)
if(r<0 && s<0){
condition <- "diminishing"
}else if(r>0 && s>0){
condition <- "increasing"
}else if(r==0 && s==0){
condition <- "constant"
}else{
return("r and s signs should match.")
}
for(i in 1:stages){
d_n <- n_A[length(n_A)] - n_B[length(n_B)]
if(condition == "diminishing"){
delta_R <- -1*delta_R
delta_S <- -1*delta_S
condition.1 <- d_n < delta_R
condition.2 <- d_n > delta_S
}else if(condition == "increasing"){
condition.1 <- d_n >= delta_R
condition.2 <- d_n <= delta_S
}else{
condition.1 <- TRUE
condition.2 <- TRUE
}
p <- rbinom(1, 1, 0.5) #throw a coin, if we get tails then an R agent comes up
#otherwise an S agent comes up
if(p == 0){
if(condition.1){ #check if we have reached the threshold for switching
returns <- c(returns, a_R + r*n_A[length(n_A)])
n_A <- c(n_A, n_A[length(n_A)] + 1)
n_B <- c(n_B, n_B[length(n_B)])
}else{
returns <- c(returns, b_R + r*n_B[length(n_B)])
n_A <- c(n_A, n_A[length(n_A)])
n_B <- c(n_B, n_B[length(n_B)] + 1)
}
}else{
if(condition.2){
returns <- c(returns, b_S + s*n_B[length(n_B)])
n_A <- c(n_A, n_A[length(n_A)])
n_B <- c(n_B, n_B[length(n_B)] + 1)
}else{
returns <- c(returns, a_S + s*n_A[length(n_A)])
n_A <- c(n_A, n_A[length(n_A)] + 1)
n_B <- c(n_B, n_B[length(n_B)])
}
}
}
return(list(returns, n_A, n_B))
}
The increasing return case
#Define parameters
a_R <- 10
a_S <- 6
b_R <- 4
b_S <- 12
r <- 1
s <- 3
returns.mat<-matrix(nrow=50, ncol=10)
d.mat<-matrix(nrow=50, ncol=10)
#simulate the problem
for(i in 1:10){
simulate.problem(49, a_R, a_S, b_R, b_S, r, s)->sims
sims[[1]] -> returns
sims[[2]] - sims[[3]] -> d_n
returns.mat[,i]<-returns
d.mat[,i] <- d_n
}
as.data.frame(returns.mat)->returns.df
as.data.frame(d.mat)->d.df
colnames(returns.df) <- as.character(as.roman(1:10))
colnames(d.df) <- as.character(as.roman(1:10))
returns.df$stage <- 1:50
d.df$stage <- 1:50
delta_R <- find.delta_R(b_R,a_R,r)
delta_S <- find.delta_S(b_S,a_S,s)
ggplot(data = d.df)+
geom_point(aes(x=stage, y=I), shape=0, color = "#3AA6B9")+
geom_point(aes(x=stage, y=II), shape=1, color = "#254336")+
geom_point(aes(x=stage, y=III), shape=2, color = "#32012F")+
geom_point(aes(x=stage, y=IV), shape=3, color = "#8E3E63")+
geom_point(aes(x=stage, y=V), shape=4, color = "#A91D3A")+
geom_point(aes(x=stage, y=VI), shape=5, color = "#A79277")+
geom_point(aes(x=stage, y=VII), shape=6, color = "#481E14")+
geom_point(aes(x=stage, y=VIII), shape=7, color = "#2C7865")+
geom_point(aes(x=stage, y=IX), shape=8, color = "#222831")+
geom_point(aes(x=stage, y=X), shape=9, color = "#E1AFD1")+
geom_hline(yintercept = delta_R, linetype = 'dashed')+
geom_hline(yintercept = delta_S, linetype = 'dashed')+
theme(
panel.grid.major = element_blank(),
panel.grid.minor = element_blank(),
panel.background = element_blank(),
axis.line = element_line(colour = "black"),
plot.title = element_text(hjust = 0.5, size = 20),
axis.text = element_text(size = 15),
text = element_text(size = 18)
)+labs(tag="A", y = TeX("$d_n$"))
ggplot(data = returns.df)+
geom_point(aes(x=stage, y=I), shape=0, color = "#3AA6B9")+
geom_point(aes(x=stage, y=II), shape=1, color = "#254336")+
geom_point(aes(x=stage, y=III), shape=2, color = "#32012F")+
geom_point(aes(x=stage, y=IV), shape=3, color = "#8E3E63")+
geom_point(aes(x=stage, y=V), shape=4, color = "#A91D3A")+
geom_point(aes(x=stage, y=VI), shape=5, color = "#A79277")+
geom_point(aes(x=stage, y=VII), shape=6, color = "#481E14")+
geom_point(aes(x=stage, y=VIII), shape=7, color = "#2C7865")+
geom_point(aes(x=stage, y=IX), shape=8, color = "#222831")+
geom_point(aes(x=stage, y=X), shape=9, color = "#E1AFD1")+
theme(
panel.grid.major = element_blank(),
panel.grid.minor = element_blank(),
panel.background = element_blank(),
axis.line = element_line(colour = "black"),
plot.title = element_text(hjust = 0.5, size = 20),
axis.text = element_text(size = 15),
text = element_text(size = 18)
)+labs(tag="B", y="returns")
As you see we picked \(r\) to be less than \(s\) meaning the payoff of the adoption of the B technology is three times higher than the adoption of Technology A. However, we can see in simulations that it is possible for the market to be locked in with the technology with the lower payoff. It is evident in the panel B plot that the simulation cases with the highest payoff have absorption to Technology B (cases, red plus and pink diamond plus).
The diminishing return case
#Define parameters
a_R <- -4
a_S <- -12
b_R <- -10
b_S <- -6
r <- -3
s <- -1
returns.mat<-matrix(nrow=50, ncol=10)
d.mat<-matrix(nrow=50, ncol=10)
#simulate the problem
for(i in 1:10){
simulate.problem(49, a_R, a_S, b_R, b_S, r, s)->sims
sims[[1]] -> returns
sims[[2]] - sims[[3]] -> d_n
returns.mat[,i]<-returns
d.mat[,i] <- d_n
}
as.data.frame(returns.mat)->returns.df
as.data.frame(d.mat)->d.df
colnames(returns.df) <- as.character(as.roman(1:10))
colnames(d.df) <- as.character(as.roman(1:10))
returns.df$stage <- 1:50
d.df$stage <- 1:50
delta_R <- find.delta_R(b_R,a_R,r)
delta_S <- find.delta_S(b_S,a_S,s)
ggplot(data = d.df)+
geom_point(aes(x=stage, y=I), shape=0, color = "#3AA6B9")+
geom_point(aes(x=stage, y=II), shape=1, color = "#254336")+
geom_point(aes(x=stage, y=III), shape=2, color = "#32012F")+
geom_point(aes(x=stage, y=IV), shape=3, color = "#8E3E63")+
geom_point(aes(x=stage, y=V), shape=4, color = "#A91D3A")+
geom_point(aes(x=stage, y=VI), shape=5, color = "#A79277")+
geom_point(aes(x=stage, y=VII), shape=6, color = "#481E14")+
geom_point(aes(x=stage, y=VIII), shape=7, color = "#2C7865")+
geom_point(aes(x=stage, y=IX), shape=8, color = "#222831")+
geom_point(aes(x=stage, y=X), shape=9, color = "#E1AFD1")+
geom_hline(yintercept = delta_R, linetype = 'dashed')+
geom_hline(yintercept = delta_S, linetype = 'dashed')+
theme(
panel.grid.major = element_blank(),
panel.grid.minor = element_blank(),
panel.background = element_blank(),
axis.line = element_line(colour = "black"),
plot.title = element_text(hjust = 0.5, size = 20),
axis.text = element_text(size = 15),
text = element_text(size = 18)
)+labs(tag="A", y = TeX("$d_n$"))+ylim(c(-10, 10))
ggplot(data = returns.df)+
geom_point(aes(x=stage, y=I), shape=0, color = "#3AA6B9")+
geom_point(aes(x=stage, y=II), shape=1, color = "#254336")+
geom_point(aes(x=stage, y=III), shape=2, color = "#32012F")+
geom_point(aes(x=stage, y=IV), shape=3, color = "#8E3E63")+
geom_point(aes(x=stage, y=V), shape=4, color = "#A91D3A")+
geom_point(aes(x=stage, y=VI), shape=5, color = "#A79277")+
geom_point(aes(x=stage, y=VII), shape=6, color = "#481E14")+
geom_point(aes(x=stage, y=VIII), shape=7, color = "#2C7865")+
geom_point(aes(x=stage, y=IX), shape=8, color = "#222831")+
geom_point(aes(x=stage, y=X), shape=9, color = "#E1AFD1")+
theme(
panel.grid.major = element_blank(),
panel.grid.minor = element_blank(),
panel.background = element_blank(),
axis.line = element_line(colour = "black"),
plot.title = element_text(hjust = 0.5, size = 20),
axis.text = element_text(size = 15),
text = element_text(size = 18)
)+labs(tag="B", y="returns")
The constant return case
#Define parameters
a_R <- 5
a_S <- 3
b_R <- 2
b_S <- 6
r <- 0
s <- 0
returns.mat<-matrix(nrow=10000, ncol=10)
d.mat<-matrix(nrow=10000, ncol=10)
#simulate the problem
for(i in 1:10){
simulate.problem(9999, a_R, a_S, b_R, b_S, r, s)->sims
sims[[1]] -> returns
sims[[2]] - sims[[3]] -> d_n
returns.mat[,i]<-returns
d.mat[,i] <- d_n
}
as.data.frame(returns.mat)->returns.df
as.data.frame(d.mat)->d.df
colnames(returns.df) <- as.character(as.roman(1:10))
colnames(d.df) <- as.character(as.roman(1:10))
returns.df$stage <- 1:10000
d.df$stage <- 1:10000
delta_R <- find.delta_R(b_R,a_R,r)
delta_S <- find.delta_S(b_S,a_S,s)
#We track the limit of $d_n/n$ as $n$ goes to infinity
# Apply the division using the generated Roman numerals
for (col in as.character(as.roman(1:10))) {
d.df[[col]] <- d.df[[col]] / d.df$stage
}
ggplot(data = d.df)+
geom_point(aes(x=stage, y=I), shape=0, color = "#3AA6B9")+
geom_point(aes(x=stage, y=II), shape=1, color = "#254336")+
geom_point(aes(x=stage, y=III), shape=2, color = "#32012F")+
geom_point(aes(x=stage, y=IV), shape=3, color = "#8E3E63")+
geom_point(aes(x=stage, y=V), shape=4, color = "#A91D3A")+
geom_point(aes(x=stage, y=VI), shape=5, color = "#A79277")+
geom_point(aes(x=stage, y=VII), shape=6, color = "#481E14")+
geom_point(aes(x=stage, y=VIII), shape=7, color = "#2C7865")+
geom_point(aes(x=stage, y=IX), shape=8, color = "#222831")+
geom_point(aes(x=stage, y=X), shape=9, color = "#E1AFD1")+
theme(
panel.grid.major = element_blank(),
panel.grid.minor = element_blank(),
panel.background = element_blank(),
axis.line = element_line(colour = "black"),
plot.title = element_text(hjust = 0.5, size = 20),
axis.text = element_text(size = 15),
text = element_text(size = 18)
)+labs(tag="A", y = TeX("$\\frac{d_n}{n}$"))
To be continued…
In this example we consider an agent interacting with n slot machines, each with a different, unknown probability of winning. The agent’s objective is to identify and play the slot machine that maximizes the chances of winning over time.
We first use a Thompson sampling approach. Thompson sampling is a Bayesian approach for optimizing the probability of selecting the best machine. Suppose we begin with three slot machines and we have no prior knowledge of their individual probabilities of success. Since we have no background information, we assign each slot machine a prior distribution of \(\beta(1, 1)\) (equivalent to a uniform distribution). At each stage, we draw a random sample from the \(\beta\) distribution of each machine and compare the results. We then select the machine with the highest sampled value for play.
If the chosen machine results in a win, we update its distribution to \(\beta(2, 1)\). Otherwise, if the machine results in a loss, we adjust its distribution to \(\beta(1, 2)\). We then repeat this process over N trials, and we track the number of wins and losses for each machine.
library(tidyr)
#' Simulate the MAB problem over a finite horizon using Thompson sampling
#'
#' @param N the number of trials
#' @param p the actual probability of winning by playing each arm
#'
#' @return a list containing played arms, winnings per arm, losses per arm, and simulations
#' @export
simulate.MAB.thompson.sampling <- function(N, p){
simulations <- sapply(p, function(prob) rbinom(N, 1, prob)) #You win or lose based
#on a Bernoulli distribution
num.arms <- length(p) #Number of bandits
#Store number of winnings and losses per bandit
winnings <- rep(0, num.arms)
losses <- rep(0, num.arms)
played.arms <- c() #Store which arm you have played at each stage
#Doing this over a finite horizon, n stages
for(n in 1:N){
hold.max.beta <- 0
for(arm in 1:num.arms){
beta <- rbeta(1, shape1 = winnings[arm] + 1, shape2 = losses[arm] + 1)
if(beta > hold.max.beta){
track.arm <- arm
beta -> hold.max.beta
}
}
played.arms <- c(played.arms, track.arm)
if(simulations[n, track.arm] == 1){
winnings[track.arm] <- winnings[track.arm] + 1
}else{
losses[track.arm] <- losses[track.arm] + 1
}
}
return(list(played.arms, winnings, losses, simulations))
}
Simulate the game
Let’s start with a simple scenario where we have three slot machines that we have never played before. We don’t know which of the slot machines has a higher probability of winning. We denote the probability of winning to be \(p_1\), \(p_2\), and \(p_3\)
In this version we are trying the bandits over a finite horizon with no discounting
N <- 1000 #The number of trials
p <- c(0.55, 0.7, 0.45) #The probability of winning for different bandits
simulate.MAB.thompson.sampling(N, p) -> results.lst
results.lst[[1]]->played.arms
results.lst[[2]]->winnings
results.lst[[3]]->losses
results.lst[[4]]->simulations
num.arms <- length(p)
#Matrices for tracking cumulative plays and cumulative wins for each arm
cumulative.plays <- matrix(0, nrow=length(played.arms), ncol=num.arms)
cumulative.winnings <- matrix(0, nrow = nrow(simulations), ncol = num.arms)
#Iterate over arms and find cumulative wins and cumulative plays
for (i in 1:length(played.arms)) {
if (i > 1) {
cumulative.plays[i, ] <- cumulative.plays[i-1, ]
cumulative.winnings[i, ] <- cumulative.winnings[i - 1, ]
}
arm_played <- played.arms[i]
cumulative.plays[i, played.arms[i]] <- cumulative.plays[i, played.arms[i]] + 1
cumulative.winnings[i, arm_played] <- cumulative.winnings[i, arm_played] + simulations[i, arm_played]
}
#Convert matrix to dataframe
cum.plays.df <- as.data.frame(cumulative.plays)
colnames(cum.plays.df) <- paste0("Arm ", 1:num.arms)
cum.plays.df$Stage <- 1:nrow(cum.plays.df)
#Reshape dataframe to long format
cum.plays.long <- pivot_longer(cum.plays.df, cols = starts_with("Arm "),
names_to = "Arm", values_to = "CumulativePlays")
ggplot(cum.plays.long, aes(x = Stage, y = CumulativePlays, color = Arm)) +
geom_line(size = 1) +
labs(title = "Thompson sampling",
x = "Stage",
y = "Cumulative plays") +
theme(
panel.grid.major = element_blank(),
panel.grid.minor = element_blank(),
panel.background = element_blank(),
axis.line = element_line(colour = "black"),
plot.title = element_text(hjust = 0.5, size = 20),
axis.text = element_text(size = 15),
text = element_text(size = 18)
) +
labs(tag = "A")+
scale_color_manual(values = c("Arm 1" = "#17153B", "Arm 2" = "#C8ACD6", "Arm 3" = "#6A9C89"))
cum.winnings.df <- as.data.frame(cumulative.winnings)
colnames(cum.winnings.df) <- paste0("Arm ", 1:num.arms)
cum.winnings.df$Stage <- 1:nrow(cum.winnings.df)
#Reshape dataframe to long format
cum.winnings.long <- pivot_longer(cum.winnings.df, cols = starts_with("Arm "),
names_to = "Arm", values_to = "CumulativeWinnings")
ggplot(cum.winnings.long, aes(x = Stage, y = CumulativeWinnings, color = Arm)) +
geom_line(size = 1) +
labs(title = "Thompson sampling",
x = "Stage", y = "Cumulative winnings") +
theme(
panel.grid.major = element_blank(),
panel.grid.minor = element_blank(),
panel.background = element_blank(),
axis.line = element_line(colour = "black"),
plot.title = element_text(hjust = 0.5, size = 20),
axis.text = element_text(size = 15),
text = element_text(size = 18)
) +
labs(tag = "B")+
scale_color_manual(values = c("Arm 1" = "#17153B", "Arm 2" = "#C8ACD6", "Arm 3" = "#6A9C89"))
Now, let us try another method to find the solution. Through the UCB method we aim to choose the arm with the highest upper confidence bound at each stage. That means at each stage we calculate the confidence interval for each arm and choose the arm with the highest upper confidence bound.
#' Simulate the MAB problem over a finite horizon using Upper Confidence Bound (UCB)
#'
#' @param N the number of trials
#' @param p the actual probability of winning by playing each arm
#'
#' @return a list containing played arms, winnings per arm, losses per arm, and simulations
#' @export
simulate.MAB.upper.confidence.bound <- function(N, p){
simulations <- sapply(p, function(prob) rbinom(N, 1, prob)) #You win or lose based
#on a Bernoulli distribution
num.arms <- length(p) #Number of bandits
#Store number of winnings and losses per bandit
winnings <- rep(0, num.arms)
losses <- rep(0, num.arms)
played.arms <- c() #Store which arm you have played at each stage
#Doing this over a finite horizon, n stages
for(n in 1:N){
hold.max.upper.bound <- -Inf
for(arm in 1:num.arms){ #Iterate over arms
if(sum(played.arms==arm)>0){
reward <- winnings[arm]/(sum(played.arms==arm))
delta <- sqrt(2 * log(n) / sum(played.arms==arm))
upper.bound <- reward + delta
}else{upper.bound <- Inf}
if(upper.bound > hold.max.upper.bound){
track.arm <- arm
upper.bound -> hold.max.upper.bound
}
}
played.arms<- c(played.arms, track.arm)
if(simulations[n, track.arm] == 1){
winnings[track.arm] <- winnings[track.arm] + 1
}else{
losses[track.arm] <- losses[track.arm] + 1
}
}
return(list(played.arms, winnings, losses, simulations))
}
Simulate the problem using UCB
N <- 1000 #The number of trials
p <- c(0.55, 0.7, 0.45) #The probability of winning for different bandits
simulate.MAB.upper.confidence.bound(N, p) ->results.lst
results.lst[[1]]->played.arms
results.lst[[2]]->winnings
results.lst[[3]]->losses
results.lst[[4]]->simulations
num.arms <- length(p)
#Matrices for tracking cumulative plays and cumulative wins for each arm
cumulative.plays <- matrix(0, nrow=length(played.arms), ncol=num.arms)
cumulative.winnings <- matrix(0, nrow = nrow(simulations), ncol = num.arms)
#Iterate over arms and find cumulative wins and cumulative plays
for (i in 1:length(played.arms)) {
if (i > 1) {
cumulative.plays[i, ] <- cumulative.plays[i-1, ]
cumulative.winnings[i, ] <- cumulative.winnings[i - 1, ]
}
arm_played <- played.arms[i]
cumulative.plays[i, played.arms[i]] <- cumulative.plays[i, played.arms[i]] + 1
cumulative.winnings[i, arm_played] <- cumulative.winnings[i, arm_played] + simulations[i, arm_played]
}
#Convert matrix to dataframe
cum.plays.df <- as.data.frame(cumulative.plays)
colnames(cum.plays.df) <- paste0("Arm ", 1:num.arms)
cum.plays.df$Stage <- 1:nrow(cum.plays.df)
#Reshape dataframe to long format
cum.plays.long <- pivot_longer(cum.plays.df, cols = starts_with("Arm "),
names_to = "Arm", values_to = "CumulativePlays")
ggplot(cum.plays.long, aes(x = Stage, y = CumulativePlays, color = Arm)) +
geom_line(size = 1) +
labs(title = "Upper Confidence Bound",
x = "Stage",
y = "Cumulative plays") +
theme(
panel.grid.major = element_blank(),
panel.grid.minor = element_blank(),
panel.background = element_blank(),
axis.line = element_line(colour = "black"),
plot.title = element_text(hjust = 0.5, size = 20),
axis.text = element_text(size = 15),
text = element_text(size = 18)
) +
labs(tag = "A")+
scale_color_manual(values = c("Arm 1" = "#17153B", "Arm 2" = "#C8ACD6", "Arm 3" = "#6A9C89"))
cum.winnings.df <- as.data.frame(cumulative.winnings)
colnames(cum.winnings.df) <- paste0("Arm ", 1:num.arms)
cum.winnings.df$Stage <- 1:nrow(cum.winnings.df)
#Reshape dataframe to long format
cum.winnings.long <- pivot_longer(cum.winnings.df, cols = starts_with("Arm "),
names_to = "Arm", values_to = "CumulativeWinnings")
ggplot(cum.winnings.long, aes(x = Stage, y = CumulativeWinnings, color = Arm)) +
geom_line(size = 1) +
labs(title = "Upper Confidence Bound",
x = "Stage", y = "Cumulative winnings") +
theme(
panel.grid.major = element_blank(),
panel.grid.minor = element_blank(),
panel.background = element_blank(),
axis.line = element_line(colour = "black"),
plot.title = element_text(hjust = 0.5, size = 20),
axis.text = element_text(size = 15),
text = element_text(size = 18)
) +
labs(tag = "B")+
scale_color_manual(values = c("Arm 1" = "#17153B", "Arm 2" = "#C8ACD6", "Arm 3" = "#6A9C89"))
Using the \(\epsilon\)-greedy approach, we aim to find the best arm by balancing exploration and exploitation. We first pick an \(\epsilon\) ranging from 0 to 1 and at each stage we compare a random number drawn from a uniform distribution from 0 to 1 and compare it to \(\epsilon\). If it is smaller than \(\epsilon\) then we pick an arm randomly otherwise we pick the arm with the highest observed probability of winning. This means that an \(\epsilon\) of 1 corresponds to complete randomness.
#' Simulate the MAB problem over a finite horizon using epsilon-greedy
#'
#' @param N the number of trials
#' @param p the actual probability of winning by playing each arm
#' @param eps epsilon value
#'
#' @return a list containing played arms, winnings per arm, losses per arm, and simulations
#' @export
simulate.MAB.epsilon.greedy <- function(N, p, eps){
simulations <- sapply(p, function(prob) rbinom(N, 1, prob)) #You win or lose based
#on a Bernoulli distribution
num.arms <- length(p) #Number of bandits
#Store number of winnings and losses per bandit
winnings <- rep(0, num.arms)
losses <- rep(0, num.arms)
played.arms <- c() #Store which arm you have played at each stage
#Doing this over a finite horizon, n stages
for(n in 1:N){
rand.num <- runif(1)
if(rand.num < eps){
track.arm <- sample(num.arms)[1]
}else{
observed.prob.win <- winnings/(winnings+losses)
observed.prob.win[is.na(observed.prob.win)]<- 0
track.arm <- which.max(observed.prob.win)
}
played.arms <- c(played.arms, track.arm)
if(simulations[n, track.arm] == 1){
winnings[track.arm] <- winnings[track.arm] + 1
}else{
losses[track.arm] <- losses[track.arm] + 1
}
}
return(list(played.arms, winnings, losses, simulations))
}
Simulate the problem using epsilon-greedy
N <- 1000 #The number of trials
p <- c(0.55, 0.7, 0.45) #The probability of winning for different bandits
simulate.MAB.epsilon.greedy(N, p, 0.2) -> results.lst
results.lst[[1]]->played.arms
results.lst[[2]]->winnings
results.lst[[3]]->losses
results.lst[[4]]->simulations
num.arms <- length(p)
#Matrices for tracking cumulative plays and cumulative wins for each arm
cumulative.plays <- matrix(0, nrow=length(played.arms), ncol=num.arms)
cumulative.winnings <- matrix(0, nrow = nrow(simulations), ncol = num.arms)
#Iterate over arms and find cumulative wins and cumulative plays
for (i in 1:length(played.arms)) {
if (i > 1) {
cumulative.plays[i, ] <- cumulative.plays[i-1, ]
cumulative.winnings[i, ] <- cumulative.winnings[i - 1, ]
}
arm_played <- played.arms[i]
cumulative.plays[i, played.arms[i]] <- cumulative.plays[i, played.arms[i]] + 1
cumulative.winnings[i, arm_played] <- cumulative.winnings[i, arm_played] + simulations[i, arm_played]
}
#Convert matrix to dataframe
cum.plays.df <- as.data.frame(cumulative.plays)
colnames(cum.plays.df) <- paste0("Arm ", 1:num.arms)
cum.plays.df$Stage <- 1:nrow(cum.plays.df)
#Reshape dataframe to long format
cum.plays.long <- pivot_longer(cum.plays.df, cols = starts_with("Arm "),
names_to = "Arm", values_to = "CumulativePlays")
ggplot(cum.plays.long, aes(x = Stage, y = CumulativePlays, color = Arm)) +
geom_line(size = 1) +
labs(title = "Epsilon-greedy",
x = "Stage",
y = "Cumulative plays") +
theme(
panel.grid.major = element_blank(),
panel.grid.minor = element_blank(),
panel.background = element_blank(),
axis.line = element_line(colour = "black"),
plot.title = element_text(hjust = 0.5, size = 20),
axis.text = element_text(size = 15),
text = element_text(size = 18)
) +
labs(tag = "A")+
scale_color_manual(values = c("Arm 1" = "#17153B", "Arm 2" = "#C8ACD6", "Arm 3" = "#6A9C89"))
cum.winnings.df <- as.data.frame(cumulative.winnings)
colnames(cum.winnings.df) <- paste0("Arm ", 1:num.arms)
cum.winnings.df$Stage <- 1:nrow(cum.winnings.df)
#Reshape dataframe to long format
cum.winnings.long <- pivot_longer(cum.winnings.df, cols = starts_with("Arm "),
names_to = "Arm", values_to = "CumulativeWinnings")
ggplot(cum.winnings.long, aes(x = Stage, y = CumulativeWinnings, color = Arm)) +
geom_line(size = 1) +
labs(title = "Epsilon-greedy",
x = "Stage", y = "Cumulative winnings") +
theme(
panel.grid.major = element_blank(),
panel.grid.minor = element_blank(),
panel.background = element_blank(),
axis.line = element_line(colour = "black"),
plot.title = element_text(hjust = 0.5, size = 20),
axis.text = element_text(size = 15),
text = element_text(size = 18)
) +
labs(tag = "B")+
scale_color_manual(values = c("Arm 1" = "#17153B", "Arm 2" = "#C8ACD6", "Arm 3" = "#6A9C89"))
Let’s compare the run times of the algorithms:
num <- seq(10, 10000, 100) #change the number of stages from 10 to 10k
epsilon_greedy_times <- numeric(length(num))
thompson_sampling_times <- numeric(length(num))
ucb_times <- numeric(length(num))
for (i in seq_along(num)) {
n <- num[i]
#Epsilon-greedy
start.time <- Sys.time()
simulate.MAB.epsilon.greedy(n, p, 0.2)
end.time <- Sys.time()
epsilon_greedy_times[i] <- as.numeric(difftime(end.time, start.time, units = "secs"))
#Thompson sampling
start.time <- Sys.time()
simulate.MAB.thompson.sampling(n, p)
end.time <- Sys.time()
thompson_sampling_times[i] <- as.numeric(difftime(end.time, start.time, units = "secs"))
#Upper Confidence Bound (UCB)
start.time <- Sys.time()
simulate.MAB.upper.confidence.bound(n, p)
end.time <- Sys.time()
ucb_times[i] <- as.numeric(difftime(end.time, start.time, units = "secs"))
}
results <- data.frame(
n = num,
Epsilon_Greedy = epsilon_greedy_times,
Thompson_Sampling = thompson_sampling_times,
UCB = ucb_times
)
ggplot(results, aes(x = n)) +
geom_line(aes(y = Epsilon_Greedy, color = "Epsilon Greedy"), size=1.25) +
geom_line(aes(y = Thompson_Sampling, color = "Thompson Sampling"), size=1.25) +
geom_line(aes(y = UCB, color = "UCB"), size=1.25) +
labs(title = "Algorithm Run Times", x = "Stages", y = "Time (seconds)") +
theme_minimal() +
scale_color_manual(name = "Algorithm", values = c("Epsilon Greedy" = "#674188",
"Thompson Sampling" = "#E2BFD9",
"UCB" = "#921A40"))+
theme(
panel.grid.major = element_blank(),
panel.grid.minor = element_blank(),
panel.background = element_blank(),
axis.line = element_line(colour = "black"),
plot.title = element_text(hjust = 0.5, size = 20),
axis.text = element_text(size = 15),
text = element_text(size = 18)
)
Problem is taken from Richard Bellman’s book on stochastic dynamic programming.
Suppose there are two gold mines, Anaconda (\(A\)) and Bonanza (\(B\)) and each has a certain amount of gold \(x\) and \(y\). Now suppose that we are given the option to mine gold from either mine \(A\) or mine \(B\) and we know that we have a probability \(p_1\) of succeeding if mining from mine \(A\) to extract \(r_1\) fraction of the gold and a probability \(p_2\) of succeeding if mining from mine \(B\) to extract an \(r_2\) fraction of the gold. At each stage if we fail to extract gold, our mining machine will be damaged beyond repair.
To understand the optimal strategy, let us start with the first stage. We know that the expected amount of gold is \(p_1(r_1)x\) if we choose to mine from mine \(A\) and \(p_2(r_2)y\) if we choose to mine from the second mine. If we have only one stage then we would choose the mine that maximizes the expected extracted gold.
\(V_1(x, y) = max[p_1(r_1)x, p_2(r_2)y]\)
Now if we were to extend to this to an \(N\)-stage problem then the Bellman equation would become:
\(V_N(x, y) = max[p_1(r_1(x) + V_{N-1}((1-r_1)x, y)), p_2(r_2(y) + V_{N-1}(x, (1-r_2)y))]\)
#' Bellman equation function
#'
#' @param N the number of stages
#' @param x the total gold at mine A
#' @param y the total gold at mine B
#' @param p the vector containing the probability of successful extraction of gold from either mine
#' @param r the vector containing the probability of fraction of gold mined from either mine
#'
#' @return the expected extracted gold
#' @export
V <- function(N, x, y, p, r){
if(N==1){
return(max(p[1]*r[1]*x, p[2]*r[2]*y))
}
return(max(p[1]*(r[1]*x + V(N-1, (1-r[1])*x, y, p, r)), p[2]*(r[2]*y + V(N-1, x, (1-r[2])*y, p, r))))
}
#' Simulation function for the gold mininig problem
#'
#' @param N the number of stages
#' @param x the total gold at mine A
#' @param y the total gold at mine B
#' @param p the vector containing the probability of successful extraction of gold from either mine
#' @param r the vector containing the probability of fraction of gold mined from either mine
#'
#' @return the simulated extracted gold
#' @export
simulate.problem <- function(N, x, y, p, r){
extracted.gold <- 0
for(n in 1:N){
if(x*p[1]*r[1]>y*p[2]*r[2]){
if(rbinom(1, 1, p[1])){
extracted.gold <- extracted.gold + r[1]*x
x <- x - r[1]*x
}else{
return(extracted.gold)
}
}else{
if(rbinom(1, 1, p[2])){
extracted.gold <- extracted.gold + r[2]*y
y <- y - r[2]*y
}else{
return(extracted.gold)
}
}
}
return(extracted.gold)
}
N <- 20
x <- 50
y <- 70
#p <- c(0.7, 0.8)
r <- c(0.5, 0.4)
num.simulations <- 1000
p1.greater.theory <- c()
p2.greater.theory <- c()
p1.greater.sim <- c()
p2.greater.sim <- c()
for(x in seq(50, 150, 5)){
p1.greater.theory <- c(p1.greater.theory, V(N, x, y, c(0.9, 0.6), r))
p1.greater.sim <- c(p1.greater.sim, mean(replicate(num.simulations, simulate.problem(N, x, y, c(0.9, 0.6), r))))
p2.greater.theory <- c(p2.greater.theory, V(N, x, y, c(0.6, 0.9), r))
p2.greater.sim <- c(p2.greater.sim, mean(replicate(num.simulations, simulate.problem(N, x, y, c(0.6, 0.9), r))))
}
results.df <- data.frame(x = seq(50, 150, 5), theory_p1_greater = p1.greater.theory, theory_p2_greater = p2.greater.theory, simulation_p1_greater = p1.greater.sim, simulation_p2_greater = p2.greater.sim)
ggplot(results.df)+
geom_line(aes(x=x, y=theory_p1_greater, color="p1=0.9, p2=0.6 theory"), size=1.25)+
geom_line(aes(x=x, y=theory_p2_greater, color="p1=0.6, p2=0.9 theory"), size=1.25)+
geom_point(aes(x=x, y=simulation_p1_greater, color="p1=0.9, p2=0.6 sim"), stroke=1.5, shape=8)+
geom_point(aes(x=x, y=simulation_p2_greater, color="p1=0.6, p2=0.9 sim"), stroke=1.5, shape=5)+
scale_color_manual(values=c("p1=0.9, p2=0.6 theory" = "#5C5470","p1=0.9, p2=0.6 sim"="#B9B4C7" , "p1=0.6, p2=0.9 sim" = "#629584", "p1=0.6, p2=0.9 theory" ="#387478"))+labs(title="Gold mining;\n N=20, y=70, r=[0.5, 0.4]", x="x", y="Extracted gold amount", colour="Theory/simulation")+
theme(
panel.grid.major = element_blank(),
panel.grid.minor = element_blank(),
panel.background = element_blank(),
axis.line = element_line(colour = "black"),
plot.title = element_text(hjust = 0.5, size = 20),
axis.text = element_text(size = 15),
text = element_text(size = 18)
)
The following problem is an R implementation of the machine replacement example from Sheldon Ross’s “Introduction to Dynamic Programming” book, Example 3.1.
Suppose we have a machine with three different states “Perfect” (state: 0), “Good” (state: 1), and “Bad” (state: 2). We have 20 time periods (20 months) and at the end of each month we can either replace the machine with a cost of \(R\) or keep the machine and pay some fees to maintain the machine.
Here, we consider a basic three state model.
Let’s consider the non-decreasing linear maintenance cost function \(C(i) = \alpha*i\) (Don’t confuse \(\alpha\) with the discount factor used later on).
#' Maintenance cost function
#'
#' @param i the state
#'
#' @return the cost at state i
#' @export
C <- function(i){
alpha <- 2
return(alpha*i)
}
#Define the probability transition matrix ($P$). The transition matrix
#defines the probability of transitioning from one state to another.
#The element at index $i, j$ denotes the probability of transitioning from state
#$i$ to state $j$.
P <- matrix(
c(0.4, 0.4, 0.2, 0.2, 0.4, 0.4, 0.1, 0.2, 0.7),
nrow = 3, ncol = 3, byrow = TRUE
)
Let’s first define the expected theoretical cost function. We have two actions at each step, either we replace the machine or we don’t. If we replace the machine, the state of the machine switches to 0 and we pay a replacement cost. Otherwise the machine transitions to another state with the probabilities defined in the transition matrix \(P\) and we pay a maintanance cost depending on the state of the system \(C(i)\). The discount factor is added to take into account the decrease in the value of money overtime.
#' Expected cost function
#'
#' @param i state $i$
#' @param n number of
#' @param P probability transition matrix
#' @param R cost of replacing the machine
#' @param alpha discount factor
#'
#' @return the expected cost
#' @export
V <- function(i, n, P, R, alpha){
#boundary condition
if(n==1){
return(C(i))
}
sum <- 0
for(j in 0:(dim(P)[1]-1)){
sum <- sum + P[i+1, j+1] * V(j, n-1, P, R, alpha)
}
return(C(i) + min((R + alpha*V(0, n-1, P, R, alpha)), alpha*sum))
}
#Find $V_n(i)$ and plot for $\alpha$ = 0.3
df.lim <- data.frame(matrix(ncol = 3, nrow = 0))
colnames(df.lim)<-c("i", "ii", "iii")
k<-1
p<-1
for(state in 0:2){
k<-1
for(n in 1:11){
df.lim[k,p] <- V(state, n, P, 1, 0.3)
k<-k+1
}
p<-p+1
}
df.lim$n <- 1:11
ggplot(data=df.lim)+
geom_line(aes(x=n, y=i, colour = "State: 0"), size = 1.5)+
geom_line(aes(x=n, y=ii, colour = "State: 1"), size = 1.5)+
geom_line(aes(x=n, y=iii, colour = "State: 2"), size = 1.5)+
theme(
panel.grid.major = element_blank(),
panel.grid.minor = element_blank(),
panel.background = element_blank(),
axis.line = element_line(colour = "black"),
plot.title = element_text(hjust = 0.5, size = 20),
axis.text = element_text(size = 15),
text = element_text(size = 18)
) +
labs(tag = "A")+
scale_color_manual(values = c("State: 0" = "#B9B4C7", "State: 1" = "#5C5470", "State: 2" = "#352F44"))+
labs(colour = "Initial state", x = "Number of periods (n)", y="Expected cost")
#Save the results at the end of stage 11 as $V(i)$
df.lim <- df.lim[nrow(df.lim), 1:3]
\(\breve{\imath}\) is the stage at which we should replace the machine
#' Finding i bar
#'
#' @param df.lim the expected optimal cost for the system starting with state i, V(i)
#' @param P the probability transition matrix
#' @param R the cost of replacement
#' @param alpha the discount factor
#'
#' @return the optimal stage to replace
#' @export
find_i_bar <- function(df.lim, P, R, alpha){
hold.i.bar <- c()
A <- R + alpha * df.lim[1] #compare the expected cost for replacement (A)
#To the cost for maintance without replacing B
for(i in 0:2){
sum <- 0
for (j in 0:(dim(P)[1]-1)) {
sum <- sum + P[i+1, j+1] * df.lim[j+1]
}
B <- sum * alpha
hold.i.bar <- c(hold.i.bar, A<=B)
}
return(hold.i.bar)
}
# Find $\breve{\imath}$ for $\alpha$ = 0.3 and $R$ equals 1 with transition matrix
#defined above.
min(which(find_i_bar(df.lim, P, 1, 0.3) == TRUE))-1 -> i_bar
Now let’s simulate the problem and compare simulation with theory
#' Simulating the machine replacement model
#'
#' @param n the number of time periods
#' @param i the initial state of the system
#' @param i_bar the stage we replace the machine
#' @param P the probability transition matrix
#' @param R the cost of machine replacement
#' @param alpha the discount factor
#'
#' @return the total cost
#' @export
simulate.problem <- function(n, i, i_bar, P, R, alpha){
cost <- 0
state <- i
for(k in 1:n){ #iterate over time periods
if(state >= i_bar){ #see if we have reached the time period at which we replace the machine or not
cost_to_add <- R + C(state) #if we replace then we incur the replacement cost
#NOTE: because our C(i) function is equal to 2i then we don't incur any maintenance cost when we are at state 0, we may have to add the C(i) cost for other functions
state <- 0 #reset to state 0
}else{
cost_to_add <- C(state) #we incur maintenance cost
probabilities <- P[state+1,] #the system transitions to another state with the probabilities defined in the transition matrix
result <- sample(c(0, 1, 2), size = 1, prob = probabilities)
state <- result
}
cost <- cost + cost_to_add*alpha^(k-1) #take into account the discount factor
}
return(cost)
}
#Compare simulations and theory
theory.vector <- c()
num_simulations <- 1000
data.frame(matrix(ncol = 3, nrow = num_simulations)) -> hold.simulations.mat
for(state in 0:2){
hold.sims <- c()
for(k in 1:num_simulations){
hold.sims <- c(hold.sims, simulate.problem(11, state, i_bar, P, 1, 0.3))
}
hold.simulations.mat[,state+1] <- hold.sims
theory.vector <- c(theory.vector, V(state, 11, P, 1, 0.3))
}
colnames(hold.simulations.mat) <- c("i", "ii", "iii")
#Load libraries for melting the dataframe and showing LaTeX in ggplot
library(reshape2)
##
## Attaching package: 'reshape2'
## The following object is masked from 'package:tidyr':
##
## smiths
library(latex2exp)
melt(hold.simulations.mat, variable.name = "state") -> melted.simulations.mat
## No id variables; using all as measure variables
ggplot() +
geom_boxplot(data = melted.simulations.mat, aes(x=state, y=value), notch=TRUE,
size = 2, fill = c('#B9B4C7', '#5C5470', '#352F44'))+
geom_point(aes(x = 1, theory.vector[1]), shape = 8, color = "#E3A5C7", stroke = 8)+
geom_point(aes(x = 2, theory.vector[2]), shape = 8, color = "#B692C2", stroke = 8)+
geom_point(aes(x = 3, theory.vector[3]), shape = 8, color = "#694F8E", stroke = 8)+
scale_fill_brewer(palette="Blues") +
theme(
panel.grid.major = element_blank(),
panel.grid.minor = element_blank(),
panel.background = element_blank(),
axis.line = element_line(colour = "black"),
plot.title = element_text(hjust = 0.5, size=20),
axis.text = element_text(size = 15),
text = element_text(size=18))+
labs(x = "State", y="Cost", title = TeX("R = 1, $\\alpha$ = 0.3, $C(i) = 2i$"), tag = "A")
And we see that simulating the model (box plots) gives us results consistent with the theory (shown with flowers).
Problem is taken from Sheldon Ross’s book on Stochastic Dynamic Programming. The solution to the problem is taken from:
Wang, R. C. (1976). Computing optimal quality control policies—two actions. Journal of Applied Probability, 13(4), 826-832.
Let’s consider another scenario what if neither the products get produced at regular time points nor the transition to a bad machine happens at regular time steps. Instead the timepoints are taken from an exponential distribution. To model this, we use a Continuous Time Hidden Markov (CTHMM) model.
The model is similar to two coupled stochastic process which evolve through two phases
The first and second phases evolve based on the following generator matrices respectively:
\(G_1=\begin{bmatrix} -\mu_1 & \mu_1 \\ \mu_2 & -\mu_2 \end{bmatrix}\)
\(G_2=\begin{bmatrix} -\nu_1 & \nu_1 \\ \nu_2 & -\nu_2 \end{bmatrix}\)
and the switch of the states happens through the following generator matrix:
\(\Lambda=\begin{bmatrix} -\lambda & \lambda \\ 0 & 0 \end{bmatrix}\)
#' Simulating the continuous time variation of the problem without intervention
#'
#' @param N
#' @param mu_1 Rate of generating a defective item when in a good state
#' @param mu_2 Rate of generating a non-defective item when in a good state
#' @param v_1 Rate of generating a defective item when in a bad state
#' @param v_2 Rate of generating a non-defective item when in a bad state
#' @param lambda Rate of switching from a good state to a bad state
#'
#' @return observations, time of arrival of events, switching time of the stochastic processes
#' @export
simulate.machine.CTHMM.variation <- function(N, mu_1, mu_2, v_1, v_2, lambda){
state <- 0 #Start at a good state
observations <- c()
timepoints <- c(0)
for(n in 1:N){
if(state ==0){ #If the machine is at a good state
switch.tau <- rexp(1, rate=lambda)
defective.tau <- rexp(1, rate=mu_1)
nondefective.tau <- rexp(1, rate=mu_2)
#Compare the time of arrival of the products (whether defective or non-defective)
if(defective.tau<nondefective.tau){ #If the defective product arrives first
timepoints <- c(timepoints, timepoints[n-1] + defective.tau)
observations <- c(observations, "D")
}else{ #If the non-defective product arrives first
timepoints <- c(timepoints, timepoints[n-1] + nondefective.tau)
observations <- c(observations, "N")
}
#Check if a change in phase happens
if(switch.tau < min(defective.tau, nondefective.tau)){
state <- 1
switch.time <- timepoints[n-1] + switch.tau
}
}else{ #If the machine is at a bad state
defective.tau <- rexp(1, rate=v_1)
nondefective.tau <- rexp(1, rate=v_2)
if(defective.tau<nondefective.tau){ #If the defective product arrives first
timepoints <- c(timepoints, timepoints[n-1] + defective.tau)
observations <- c(observations, "D")
}else{ #If the non-defective product arrives first
timepoints <- c(timepoints, timepoints[n-1] + nondefective.tau)
observations <- c(observations, "N")
}
}
}
return(list(observations, timepoints, switch.time))
}
Simulate the CT variation without intervention
N <- 2000
mu_1 <- 0.1
mu_2 <- 1
v_1 <- 0.8
v_2 <- 0.1
lambda <- 0.001
results <- simulate.machine.CTHMM.variation(N, mu_1, mu_2, v_1, v_2, lambda)
observations <- results[[1]]
time <- results[[2]]
switch.time <- results[[3]]
ct.hmm.qc.df <- data.frame(time=time, observations=observations)
ggplot()+
geom_rect(aes(xmin=0, xmax=switch.time, ymin=0, ymax=Inf), fill="#F1D3CE")+
geom_rect(aes(xmin=switch.time, xmax=tail(ct.hmm.qc.df$time,1), ymin=0, ymax=Inf), fill="#D1E9F6")+
geom_line(data = ct.hmm.qc.df, aes(x=time, y=cumsum(observations=="N")), color="#640D5F", size=1.5)+
geom_vline(xintercept=switch.time)+
theme(
panel.grid.major = element_blank(),
panel.grid.minor = element_blank(),
panel.background = element_blank(),
axis.line = element_line(colour = "black"),
plot.title = element_text(hjust = 0.5, size=20),
axis.text = element_text(size = 15),
text = element_text(size=18))+
labs(title="CT variation simulation", y="Cumulative non-defective products")
Let us consider a variation of the above problem. Assume that if the machine is in a good state, it does not produce a defective product and it transitions to a bad state with probability \(\gamma\) and once in that state it only produces defective products and does not transition back to a good state. You incur a cost of \(C\) for producing defective products. You have the option to inspect the machine at each stage with cost \(I\) and upon inspection, if you find the machine in a bad state you replace the machine with cost \(R\). The question is when to inspect the machine to minimize the total incurred cost.
#' function to calculate the left hand side of the $t$ equation defined above
#'
#' @param t the smallest integer for which the above equation holds
#' @param alpha the discounting factor
#' @param gamma the probability of the machine transitioning from a good state
#' to a bad state
#'
#' @return the left hand side of the equation
#' @export
LHS_t_alpha_equation <- function(t, alpha, gamma){
outer.sum <- 0
for(j in 0:t){
inner.sum <- 0
for(k in 0:j){
inner.sum <- inner.sum + alpha^k
}
outer.sum <- outer.sum + inner.sum * (1-gamma)^j
}
return(outer.sum)
}
#' function to calculate the right hand side of the $t$ equation defined above
#'
#' @param R the cost of replacing the machine
#' @param C the cost of producing defective product
#' @param gamma the probability of transitioning from a good machine to a bad machine
#' @param alpha the discount factor
#' @param I the cost of inspecting the product
#'
#' @return the RHS of the equation
#' @export
RHS_alpha_t_equation <- function(R, C, gamma, alpha, I){
return(I/(gamma*(alpha*(1-gamma)*(C+R) - R)))
}
#' Find critical integer
#'
#' @param R the cost of resetting the machine
#' @param C the cost of producing a defective product
#' @param gamma the probability of transitioning from a good machine to a bad machine
#' @param alpha the discount factor
#' @param I the cost of inspecting the product
#'
#' @return the critical t
#' @export
find.t.alpha <- function(R, C, gamma, alpha, I){
hold.LHS <- c()
for(t in 0:500){
hold.LHS <- c(hold.LHS, LHS_t_alpha_equation(t, alpha, gamma))
}
return(min(which(hold.LHS >= RHS_alpha_t_equation(R, C, gamma, alpha, I))))
}
#Find the critical posterior
#' Find the critical posterior
#'
#' @param R the cost of resetting the machine
#' @param C the cost of producing a defective product
#' @param gamma the probability of transitioning from a good machine to a bad machine
#' @param alpha the discount factor
#' @param I the cost of inspecting the product
#'
#' @return the critical posterior probability of the machine being in a bad state
#' @export
find.critical.posterior <- function(R, C, gamma, alpha, I){
hold.sum.numerator <- 0
hold.sum.denominator <- 0
critical.t <- find.t.alpha(R, C, gamma, alpha, I) - 1
if(critical.t == Inf){
return(1)
}
for(j in 0:critical.t){
hold.sum.numerator <- hold.sum.numerator + alpha^j*(1-(1-gamma)^j)
hold.sum.denominator <- hold.sum.denominator + alpha^j
}
return((hold.sum.numerator+ I/(alpha*(1-gamma)*(C+R) - R))/hold.sum.denominator)
}
#' Incurred cost
#'
#' @param p the posterior probability of the machine being in the bad state
#' @param alpha the discount factor
#' @param C the cost of producing a defective product
#' @param R the cost of resetting the machine
#'
#' @return the total incurred cost
#' @export
L <- function(p, alpha, C, R){
return(p*((alpha)*(1-gamma)*(C+R)-R) + alpha*gamma*(C+R))
}
#' The optimal incurred cost function
#'
#' @param N the number of stages
#' @param gamma the probability of transitioning into the bad state from the good state
#' @param alpha the discount factor
#' @param p the posterior probability of the machine being in a bad state
#' @param C the cost of producing a defective item
#' @param R the cost of replacing the machine
#' @param I the inspection cost
#'
#' @return the expected cost
#' @export
V <- function(N, gamma, alpha, p, C, R, I){
if(N==0){
return(0)
}
updated.p <- 1 - (1-p)*(1-gamma)
return(min(p*C + alpha*V(N-1, gamma, alpha, updated.p, C, R, I), I + p*(C+R) +alpha*V(N-1, gamma, alpha, 0, C, R, I)))
}
#' The simulated incurred cost found through simulation
#'
#' @param N the number of stages
#' @param gamma the probability of transitioning into the bad state from the good state
#' @param alpha the discount factor
#' @param p the posterior probability of the machine being in a bad state
#' @param C the cost of producing a defective item
#' @param R the cost of replacing the machine
#' @param I the inspection cost
#'
#' @return the simulated cost
#' @export
simulate.problem <- function(N, gamma, alpha, p, C, R, I){
state <- 0
posterior.bad <- p
sum.cost <- 0
critical.posterior <- find.critical.posterior(R, C, gamma, alpha, I) #find the critical posterior probability of inspection
for(n in 1:N){
if(state == 1){ #if machine is in a bad state, incur costs related to the production of defective products
sum.cost <- sum.cost + C*alpha^(n-1)
}
posterior.bad <- 1 - (1-posterior.bad)*(1-gamma) #update posterior
if(posterior.bad >= critical.posterior){
sum.cost <- sum.cost + I*alpha^(n-1) #incur inspection costs
if(state == 1){ #costs related to replacement, if machine is found to be in a bad state
sum.cost <- sum.cost + R*alpha^(n-1)
state <- 0
}
posterior.bad <- 0
}
if(state ==0){ #simulate transition to the bad state from good state
state <- rbinom(1, 1, gamma)
}
}
return(sum.cost)
}
Suppose we have two coins, a green one and a red one. We know that one of the coins comes heads with probability \(p_1\) and the other comes heads with probability \(p_2\) (\(p_1 > p_2\)). We don’t know which is which but we know that the red coin is the better coin (\(p_1\)) with probability \(p\). We are flipping coins over a finite horizon and we try to maximize the expected reward. For each heads that we get we get a reward of 1 with a discount factor of \(\beta\).
Let’s first try to track the posterior probability that the red coin is in fact the better coin. For that, we try either flipping the red coin or the green coin.
Obviously, if we flip the red coin and get heads then the posterior probability of the red coin being the better coin would get updated to:
\(\frac{p(p_1)}{p(p_1)+(1-p)p_2}\)
On the other hand if we flip the red coin and get tails then the posterior probability of the red coin being the better coin would get updated to:
\(\frac{p(1-p_1)}{p(1-p_1)+(1-p)(1-p_2)}\)
#' Function for updating the posterior probability of the red coin being the more successful coin
#' when flipping the red coin. The assumption is $p_1>p_2$
#'
#' @param p the posterior probability of the red coin being the better coin
#' @param p_1 the probability of success for the more successful coin
#' @param p_2 the probability of success for the less successful coin
#' @param flip.result the result of flipping the coin; can be either "H" or "T"
#'
#' @return The updated posterior probability of the red coin being the more successful coin
#' @export
prior.update.red <- function(p, p_1, p_2, flip.result){
if(flip.result == "H"){
p.updated <- p*p_1/(p*p_1 + (1-p)*p_2)
}else if(flip.result == "T"){
p.updated <- p*(1-p_1)/(p*(1-p_1)+(1-p)*(1-p_2))
}else{
return("Error! Please include only 'H' or 'T' as function argument for 'flip.result'")
}
return(p.updated)
}
#If we flip the green coin then it would be easy to calculate the posterior probability of the
#green coin being the worse one (it is essentially the same as saying the red coin is the better one).
#If we flip the green coin and we get heads then the posterior probability would get updated to:
#$\frac{p(p_2)}{p(p_2)+(1-p)p_1}
#If we flip the green coin and get tails then the posterior probability would get updated to:
#$\frac{p(1-p_2)}{p(1-p_2)+(1-p)(1-p_1)}$
#' Function for updating the posterior probability of the red coin being the more successful coin
#' when flipping the green coin. The assumption is $p_1>p_2$
#'
#' @param p the posterior probability of the red coin being the better coin
#' @param p_1 the probability of success for the more successful coin
#' @param p_2 the probability of success for the less successful coin
#' @param flip.result the result of flipping the coin; can be either "H" or "T"
#'
#' @return The updated posterior probability of the red coin being the more successful coin
#' @export
prior.update.green <- function(p, p_1, p_2, flip.result){
if(flip.result == "H"){
p.updated <- p*p_2/(p*p_2+(1-p)*p_1)
}else if(flip.result == "T"){
p.updated <- p*(1-p_2)/(p*(1-p_2)+(1-p)*(1-p_1))
}else{
return("Error! Please include only 'H' or 'T' as function argument for 'flip.result'")
}
return(p.updated)
}
#' Simulate the green and red coin problem
#'
#' @param N the number of stages
#' @param p the posterior probability of the red coin being the one with probability $p_1$ of being
#' successful
#' @param p_1 the probability of success for coin 1
#' @param p_2 the probability of success for coin 2
#' @param beta the discount factor
#'
#' @return the list of rewards for each stage; the coin we choose at each stage; the posterior probability of the red coin being
#' the more successful one; the actual coin with a higher probabilty - if 1 it is the red coin, if 0 it is the green coin
#' @export
simulate.problem <- function(N, p, p_1, p_2, beta){
red.prior <- p
#green.prior <- 1-p
rbinom(1, 1, p)->coin.assignment
n <- 1
rewards <- c()
chosen.coin <- c()
red.posterior <- c()
while(n < N+1){
if(coin.assignment == 0){
p_g <- p_1
p_r <- p_2
}else{
p_r <- p_1
p_g <- p_2
}
if(red.prior >= 1/2){
chosen.coin <- c(chosen.coin, "red")
rbinom(1, 1, p_r)->flipped.coin
if(flipped.coin == 0){
red.prior <- prior.update.red(red.prior, p_1, p_2, "T")
rewards <- c(rewards, 0)
red.posterior <- c(red.posterior, red.prior)
}else{
red.prior <- prior.update.red(red.prior, p_1, p_2, "H")
rewards <- c(rewards, 1*beta^(n-1))
red.posterior <- c(red.posterior, red.prior)
}
}else{
chosen.coin <- c(chosen.coin, "green")
rbinom(1, 1, p_g)->flipped.coin
if(flipped.coin == 0){
red.prior <- prior.update.green(red.prior, p_1, p_2, "T")
rewards <- c(rewards, 0)
red.posterior <- c(red.posterior, red.prior)
}else{
red.prior <- prior.update.green(red.prior, p_1, p_2, "H")
rewards <- c(rewards, 1*beta^(n-1))
red.posterior <- c(red.posterior, red.prior)
}
}
n <- n+1
}
return(list(rewards, chosen.coin, red.posterior, coin.assignment))
}
Let’s think about the theoretical function. We can let it run for a long time and set the base case to when the posterior probability reaches either 0 or 1 based on the convergence we saw in the simulations. But it will take a very long time. Consequently, I will run it for a set number of stages for demonstration purposes.
#' Bellman optimality equation function
#'
#' @param p the posterior probability of the red coin being the better coin
#' @param p_1 the probability of success for the more successful coin
#' @param p_2 the probability of success for the less successful coin
#' @param beta the discount factor
#' @param stages the number of stages
#'
#' @return the expected reward
#' @export
V <- function(p, p_1, p_2, beta, stages) {
#The base case
if (stages == 0) {
return(0)
}
#We have two actions, either we:
#flip the red coin
flip.red.action <- p * (p_1 + beta * V(p * p_1 / (p * p_1 + (1 - p) * p_2),p_1, p_2, beta, stages -1)) +
(1 - p) * (p_2 + beta * V(p * (1 - p_1) / (p * (1 - p_1) + (1 - p) * (1 - p_2)),p_1, p_2, beta, stages -1))
#or we flip the green coin
flip.green.action <- (1 - p) * (p_1 + beta * V((1 - p) * p_1 / ((1 - p) * p_1 + p * p_2),p_1, p_2, beta, stages -1)) +
p * (p_2 + beta * V((1 - p) * (1 - p_1) / ((1 - p) * (1 - p_1) + p * (1 - p_2)),p_1, p_2, beta, stages -1))
return(max(flip.red.action, flip.green.action))
}
# Parameters
p <- 0.9 # Initial belief that red coin is better
p_1 <- 0.9 # Probability of heads for the better coin
p_2 <- 0.3 # Probability of heads for the worse coin
beta <- 0.95 # Discount factor
results.sim.vector <- c()
results.theoretical.vector <- c()
for(beta in seq(0, 0.95, 0.1)){
sims <- c()
for(k in 1:1000){
sims <- c(sims, sum(simulate.problem(10, p, p_1, p_2, beta)[[1]]))
}
results.sim.vector <- c(results.sim.vector, mean(sims))
results.theoretical.vector <- c(results.theoretical.vector, V(p, p_1, p_2, beta, 10))
}
result.df <- data.frame(beta = seq(0, 0.95, 0.1), theory = results.theoretical.vector, simulations = results.sim.vector)
#Check simulations vs theory
ggplot(result.df)+
geom_line(aes(x=beta, y=theory), color="#AC87C5", size=1.5)+
geom_point(aes(x=beta, y=simulations), color="#E0AED0", stroke=2.5, shape=8)+
theme(
panel.grid.major = element_blank(),
panel.grid.minor = element_blank(),
panel.background = element_blank(),
axis.line = element_line(colour = "black"),
plot.title = element_text(hjust = 0.5, size=20),
axis.text = element_text(size = 15),
text = element_text(size=18))+labs(x=TeX("$\\beta"), y="Reward", title="Red and green coin problem; 10 stages")
Consider the following inventory problem. At the beginning of each day, the amount of goods on hand is noted and a decision is made as to how much to order. The cost for ordering \(j\) additional units is \(C(j)\), where
\[ C(j) = \begin{cases} K + cj & \text{if } j > 0, \\ 0 & \text{if } j = 0. \end{cases} \]
The order is assumed to be immediately filled. After the order has been filled, the daily demand for the product occurs. The demand will be \(j\) with probability \(P_j, j \geq 0\). If the demand exceeds the present supply, then a penalty cost of \(A\) per unit of unmet demand is incurred. It is also assumed that, if the demand exceeds the supply, then the additional demand is backlogged and is filled when additional inventory becomes available (this can be represented as negative inventory). In addition, there is an inventory holding cost of \(h\) for each item of remaining inventory at the end of a period.
The objective is to minimize the total expected discounted cost over an infinite time horizon when \(\alpha\) is the discount factor.
Set this up as a Markov decision process and write the optimality equation.
Consider now a single-period version of the preceding problem. Let \(L(j) = A \sum_{k=j}^{\infty} (k-j)P_k + h \sum_{k=0}^{j} (j-k)P_k\) denote the expected penalty and holding costs if we order to bring inventory up to \(j\).
Show that \(L(j)\) is convex. That is, \(L(j+1) - L(j)\) is non-decreasing in \(j\). Show that the optimal policy when the initial inventory is \(i\) is to order \(S - i\) if \(i < s\) and 0 if \(i \geq s\) where \(S\) is the value that minimizes \(cj + L(j)\) and \(s\) is such that \(cs + L(s) = K + cs + L(S)\).
We take the demand from a Poisson distribution. To simulate the problem, let us first define some functions.
#' The expected incurred cost function for a single stage
#'
#' @param j the inventory value
#' @param A the cost of not meeting demands
#' @param h the cost of holding excess inventory
#' @param lambda the Poisson distribution parameter for the demand
#'
#' @return the expected cost for a single stage
#' @export
L <- function(j, A, h, lambda){
#Two scenarios either the demand exceeds inventory or the inventory exceeds the demand
#If demand is greater than supply then we go over all possible values for the demand with the probability of occurrence and calculate the expected cost of not meeting demands.
k <- j
sum.demand.g.supply <- 0
while(dpois(k, lambda)>10e-5){
sum.demand.g.supply <- sum.demand.g.supply + (k-j)*dpois(k, lambda)
k <- k+1
}
sum.demand.g.supply <- sum.demand.g.supply*A
#If supply is greater than demand then we go over all possible values for the demand with the probability of occurrence (up to our current inventory) and calculate the expected cost of holding excess inventory.
sum.demand.l.supply <- 0
for(k in 0:j){
sum.demand.l.supply <- sum.demand.l.supply + (j-k)*dpois(k, lambda)
}
sum.demand.l.supply <- sum.demand.l.supply*h
#Return the expected cost
return(sum.demand.g.supply + sum.demand.l.supply)
}
j_seq <- 2:20
L_j <- c()
L_actual <- c()
for(j in j_seq){
L_j <- c(L_j, L(j+1, 1, 2, 5) - L(j, 1, 2, 5))
L_actual <- c(L_actual, L(j, 1, 2, 5))
}
L.df <- data.frame(j = j_seq, L = L_j)
ggplot(L.df)+
geom_line(aes(x=j_seq, y=L_j, colour = "Difference"), size=1.5)+
geom_line(aes(x=j_seq, y=L_actual, colour = "L(j)"), size=1.5)+
theme(
panel.grid.major = element_blank(),
panel.grid.minor = element_blank(),
panel.background = element_blank(),
axis.line = element_line(colour = "black"),
plot.title = element_text(hjust = 0.5, size=20),
axis.text = element_text(size = 15),
text = element_text(size=18))+labs(x="j", y="Value", title="Inventory problem")+
scale_color_manual(values = c("Difference" = "#7E60BF", "L(j)" = "#433878"))+
labs(colour="", tag="A")
#' Finds the critical S value (for how much the inventory value should be) that minimizes the cost when K=0
#'
#' @param A the cost of not meeting demands
#' @param h the cost of exceeding demands
#' @param lambda the parameter for the Poisson distribution from which the demand is taken from
#' @param c the cost of ordering additional products
#' @param search.interval the search interval for how much we should increase inventory, should be an interval of integer numbers such as 1:300
#'
#' @return the critical S value which minimizes the incurred cost for when K=0 (no baseline cost for ordering an item)
#' @export
find.S <- function(A, h, lambda, c, search.interval){
#We do a brute force search over all interval
brute.force.vector <- sapply(search.interval, function(j) {
L(j, A, h, lambda) + c * j
})
S <- search.interval[which(brute.force.vector == min(brute.force.vector))]
return(S)
}
#' Finds the critical s value (for how much the inventory value should be) that minimizes the cost
#'
#' @param A the cost of not meeting demands
#' @param h the cost of exceeding demands
#' @param lambda the parameter for the Poisson distribution from which the demand is taken from
#' @param c the cost of ordering additional products
#' @param S the critical S that minimizes the incurred cost for when K=0
#' @param K the necessary cost for even ordering (you have to pay $K if you decide to order in addition to the cost for buying the items)
#' @param search.interval the search interval for how much we should increase inventory, should be an interval of integer numbers such as 1:300
#' @return the critical s value which minimizes the incurred cost
#' @export
find.s <- function(A, h, lambda, c, S, K, search.interval){
#Check the target value to which we compare the incurred cost with the critical s value
target_value <- c * S + L(S, A, h, lambda) + K
#We do a brute force search over all interval
brute.force.vector <- sapply(search.interval, function(s) {
L(s, A, h, lambda) + c * s
})
#Find the s that minimizes the Euclidean distance
distances <- sqrt((brute.force.vector - target_value)^2)
min_index <- which.min(distances)
s <- search.interval[min_index]
return(s)
}
We are assuming that we know the \(\lambda\) parameter for the Poisson distribution from which the demand comes from.
#' Return the cost incurred
#'
#' @param initial.stock the initial inventory value we start with
#' @param A the cost of not meeting demands
#' @param h the cost of
#' @param lambda
#' @param stages the number of stages
#' @param alpha the discount factor
#' @param K the baseline and necessary cost for ordering
#' @param c the cost of ordering additional products
#' @param search.interval the search interval for how much we should increase inventory, should be an interval of integer numbers such as 1:300
#'
#' @return the incurred cost over all stages
#' @export
simulate.problem <- function(initial.stock, A, h, lambda, stages, alpha, K, c, search.interval){
stock <- initial.stock #Set initial inventory value
cost.incurred <- 0 #Initialize incurred cost
#Find out how much we should order
critical.S <- find.S(A, h, lambda, c, search.interval)
critical.s <- find.s(A, h, lambda, c, critical.S, K, search.interval)
for(n in 1:stages){ #Go over all stages
#If we have more inventory than how much we should have to satisfy demands, we don't order and incur no costs.
if(critical.s<=stock){
u <- 0
ordering.cost <- 0
}else{ #Otherwise we order and pay some ordering costs.
u <- critical.s - stock
ordering.cost <- u*c + K
}
w <- rpois(1, lambda) #Demand comes from a Poisson distribution
if(w > (u+stock)){ #If the demand exceeds updated inventory value then we incur some costs
cost.incurred + (w - (u+stock))*A*(alpha)^(n-1) -> cost.incurred
stock <- w - (u + stock)
}
if(w < (u+stock)){ #If the updated inventory value exceeds the demand then we incur costs related to holding excess inventory
cost.incurred + ((u+stock) - w)*h*(alpha)^(n-1) -> cost.incurred
stock <- u+stock - w
}
ordering.cost*alpha^(n-1) + cost.incurred ->cost.incurred
}
return(cost.incurred)
}
#' The expected incurred cost for the inventory problem
#'
#' @param i the initial inventory value we start with
#' @param A the cost of not meeting demands
#' @param h the cost of
#' @param lambda
#' @param stages the number of stages
#' @param alpha the discount factor
#' @param K the baseline and necessary cost for ordering
#' @param c the cost of ordering additional products
#' @param search.region the search region for how much we should increase inventory, since we start with 0 should be a single number
#'
#' @return the expected incurred cost for running the inventory problem
#' @export
V <- function(i, A, h, lambda, n, alpha, K, c, search.region){
#Base case is when we exhaust all stages then there is no demand
if(n==0){
return(0)
}
#Find out how much we should increase our inventory value (j is the optimal inventory value we want to have)
sum.hold.j <- c()
for(j in i:(i+search.region)){
sum.hold.j <- c(sum.hold.j, L(j, A, h, lambda))
}
i:(i+search.region) -> search.interval
search.interval[which(sum.hold.j == min(sum.hold.j))]->j
ordering.cost <- c*(j-i)+K
#If it is optimal to not order anything then we don't incur any ordering costs and the inventory loses the expected amount of demand
if(j == i){
i - lambda -> j
ordering.cost <- 0
}
#At each step we sum up the expected cost of the previous stage and the cost incurred in the current stage
return(V(j, A, h, lambda, n-1, alpha, K, c, search.region)*alpha + min(sum.hold.j) + ordering.cost)
}
hold.store.sim <- numeric()
hold.store.theory <- numeric()
inventory_values <- seq(0, 100, 10)
hold.store.sim <- sapply(inventory_values, function(i) {
hold.store <- replicate(500, simulate.problem(i, 5, 1, 5, 10, 0.7, 1, 0.3, 1:300))
mean(hold.store)
})
hold.store.theory <- sapply(inventory_values, function(i) {
V(i, 5, 1, 5, 10, 0.7, 1, 0.3, 300)
})
result.df <- data.frame(inventory = inventory_values, simulations = hold.store.sim, theory = hold.store.theory)
ggplot(result.df)+
geom_line(aes(x=inventory, y=theory), color = "#7E60BF", size=2)+
geom_point(aes(x=inventory, y=simulations), color = "#433878", shape=8, stroke =2.5)+
theme(
panel.grid.major = element_blank(),
panel.grid.minor = element_blank(),
panel.background = element_blank(),
axis.line = element_line(colour = "black"),
plot.title = element_text(hjust = 0.5, size=20),
axis.text = element_text(size = 15),
text = element_text(size=18))+labs(x="Initial inventory", y="Cost", title="Inventory problem; 10 stages")
Below we consider a simple variation of Merton’s portfolio problem in discrete time and find the optimal solution solely through programming.
Suppose we start with a wealth of \(S_0\) and at each stage, \(n\), we have the choice to consume \(C_n\) units, invest \(I_n\) in a risk-free asset with a guaranteed return of \(r\), and \(J_n\)units in a risky asset with a random return taken from a log normal distribution \(Z_n\). Our goal is to find out what fraction of our wealth we should allocate to investing in different assets vs consuming to maximize our utility obtained through consumption \((u(c^\beta))\).
Let’s start by defining the optimality function for this problem. One could imagine that if we were to run this problem for a finite horizon for the sake of simplicity, at stage \(N\), we no longer need to invest any money and our allocation for \(I_{N}\) and \(J_{N}\) would be 0. Consequently, all of our money would get allocated to \(C_{N}\). Now let us move one step backward. At this stage we have to sum up \(C_{N-1}\) and \(\alpha(E[V(S_{N})])\). We know that \(S_{N}\) is equal to the expected value of \(rI_{N}+ZJ_{N}\). That means the optimality equation is equal to \(V(S_n) = max_{I_n, J_n, C_n}\[u(S_{n}\ + \alpha V(S_{n+1})]\). To account for \(Z_n\), we integrate over the distribution of \(Z\).
In the example below, we consider the risky asset to have its return taken from a log normal distribution and use a programming approach to find optimal fractions.
#' The utility function
#'
#' @param c units of consumption
#' @param beta the beta parameter for calculating utility
#'
#' @return the utility
#' @export
u <- function(c, beta){
return(c^beta)
}
#' Bellman equation function
#'
#' @param S the wealth
#' @param N the number of stages
#' @param lambda.c units consumed
#' @param lambda.i units invested in riskless asset
#' @param beta the function parameter for utility function
#' @param r rate of growth for riskless investment
#' @param mean.z the mean of the log normal distribution related to the risky asset
#' @param sd.z standard deviation of the log normal distribution to the risky asset
#' @param alpha the discount factor
#'
#' @return the expected reward
#' @export
V <- function(S, N, lambda.c, lambda.i, beta, r, mean.z = 0, sd.z = 1, alpha=0.95, integration.limit=15){
return(V_helper(1, S, N, lambda.c, lambda.i, beta, r, mean.z, sd.z, alpha, integration.limit))
}
#' Helper function for the optimality equation
#'
#' @param n the current stage
#' @param S the wealth
#' @param N the number of stages
#' @param lambda.c units consumed
#' @param lambda.i units invested in riskless asset
#' @param beta the function parameter for utility function
#' @param r rate of growth for riskless investment
#' @param mean.z the mean of the log normal distribution related to the risky asset
#' @param sd.z standard deviation of the log normal distribution to the risky asset
#' @param alpha the discount factor
#'
#' @return the expected reward
#' @export
V_helper <- function(n, S, N, lambda.c, lambda.i, beta, r, mean.z, sd.z, alpha, integration.limit = 15){
if(lambda.c + lambda.i > 1){ # Return Inf if the fractions exceed 1
return(Inf)
}
#boundary condition
if(n == N){
return(u(S, beta))
}
lambda.j <- 1 - (lambda.c + lambda.i)
#integrate over Z distribution
integrand <- function(z){
S_next <- (1 + r) * lambda.i * S + z * lambda.j * S
V_next <- V_helper(n + 1, S_next, N, lambda.c, lambda.i, beta, r, mean.z, sd.z, alpha)
return(V_next * dlnorm(z, meanlog = mean.z, sdlog = sd.z))
}
expected_value <- integrate(integrand, lower = 0, upper = integration.limit)$value
return(u(lambda.c * S, beta) + alpha * expected_value)
}
#' Objective function to minimize
#'
#' @param par parameter set consisting of lambda_c and lambda_i
#' @param S0 the initial wealth
#' @param N the number of stages
#' @param beta the function parameter for utility function
#' @param r rate of growth for riskless investment
#' @param mean.z the mean of the log normal distribution related to the risky asset
#' @param sd.z standard deviation of the log normal distribution to the risky asset
#' @param alpha the discount factor
#'
#' @return -1 times the value of the Bellman equation
#' @export
objective.function <- function(par, S0, N, beta, r, mean.z = 0, sd.z = 1, alpha = 0.95) {
lambda.c <- par[1]
lambda.i <- par[2]
lambda.j <- 1 - lambda.c - lambda.i
if (lambda.j < 0 || lambda.c < 0 || lambda.i < 0) {
return(Inf)
}
value <- V(S0, N, lambda.c, lambda.i, beta, r, mean.z, sd.z, alpha)
#we try to minimize -V
return(-value)
}
library(stats)
#Define parameters
initial_guess <- c(0.3, 0.3)
S0 <- 10000
N <- 5
beta <- 0.09
r <- 0.3
mean.z <- 0
sd.z <- 1
alpha <- 0.95
#Find optimal fractions
result <- constrOptim(
theta = initial_guess,
f = objective.function,
grad = NULL,
ui = rbind( #define constraints
c(1, 0), #lambda.c >= 0
c(0, 1), #lambda.i >= 0
c(-1, -1) #lambda.c + lambda.i <= 1
),
ci = c(0, 0, -1),
S0 = S0,
N = N,
beta = beta,
r = r,
mean.z = mean.z,
sd.z = sd.z,
alpha = alpha
)
optimal.lambda.c <- result$par[1]
optimal.lambda.i <- result$par[2]
optimal.lambda.j <- 1 - optimal.lambda.c - optimal.lambda.i
optimal.V <- -result$value
#' Simulate the problem
#'
#' @param S0 the initial wealth
#' @param N the number of stages
#' @param lambda.c units consumed
#' @param lambda.i units invested in riskless asset
#' @param beta the function parameter for utility function
#' @param r rate of growth for riskless investment
#' @param mean.z the mean of the log normal distribution related to the risky asset
#' @param sd.z standard deviation of the log normal distribution to the risky asset
#' @param alpha the discount factor
#'
#' @return the expected utility from simulations
#' @export
simulate.problem.different.proportions <- function(S0, N, lambda.c, lambda.i, beta, r, mean.z = 0, sd.z = 1, alpha=0.95){
S <- S0
sum.utility <- 0
lambda.j <- 1 - (lambda.c+lambda.i)
for(n in 1:(N-1)){
S*c(lambda.c, lambda.i, lambda.j) -> updated.allocations
Z <- rlnorm(1, meanlog = mean.z, sdlog = sd.z) #Take Z from a log normal distribution
sum(updated.allocations[2:3]*c(r+1, Z))->S
sum.utility<-sum.utility + u(updated.allocations[1], beta)*alpha^(n-1)
}
sum.utility <- sum.utility + u(S, beta)*alpha^(N-1)
return(sum.utility)
}
result.df <- data.frame(lambda.i = numeric(), lambda.c = numeric(),
hold.theory = numeric(), hold.sims = numeric())
for(lambda.i in c(optimal.lambda.i-0.3, optimal.lambda.i-0.2, optimal.lambda.i)){
for(lambda.c in c(optimal.lambda.c-0.2, optimal.lambda.c-0.15, optimal.lambda.c-0.1, optimal.lambda.c-0.05, optimal.lambda.c, optimal.lambda.c+0.1)){
#theoretical value
hold.theory <- V(S0, N, lambda.c, lambda.i, beta, r)
#simulations
simulations <- replicate(1000, simulate.problem.different.proportions(S0, N, lambda.c, lambda.i, beta, r))
hold.sims <- mean(simulations)
result.df <- rbind(result.df, data.frame(lambda.i = lambda.i, lambda.c = lambda.c,
hold.theory = hold.theory, hold.sims = hold.sims))
}
}
ggplot(result.df, aes(x = lambda.c, color = factor(lambda.i))) +
geom_point(aes(y = hold.sims), stroke = 2, shape=8) +
geom_line(aes(y = hold.theory, group = lambda.i), size = 1) +
labs(title = "Portfolio problem rewards",
x = expression(lambda[c] ~ "(Consumption Fraction)"),
y = "Value",
color = expression(lambda[i] ~ "(Safe Investment Fraction)")) +
theme(
panel.grid.major = element_blank(),
panel.grid.minor = element_blank(),
panel.background = element_blank(),
axis.line = element_line(colour = "black"),
plot.title = element_text(hjust = 0.5, size=20),
axis.text = element_text(size = 15),
text = element_text(size=18))+labs(tag="A")+
ylim(c(8, 11))+geom_hline(yintercept=optimal.V)+
geom_vline(xintercept=optimal.lambda.c)+
scale_color_manual(values = c("0.249538121895296" = "#D96098", "0.349538121895296" = "#BEAEE2", "0.549538121895296" = "navy"))
Assume a stopping problem where a decision maker can either continue with no cost \(C(i)=0\) or stop and receive a reward equivalent to \(R(i)=i\). At each stage that the decision maker decides to continue, there is a 50% chance that his wealth will change to \(i+1\) and a 50% chance that his wealth changes to \(i-1\). If we were to define the optimality equation in such a way that the optimal policy minimizes the cost then we should set the boundary condition to \(V_0(i) = -R(i)\). However, we know that negative dynamic programming has two requirements that need to be met:
\(\inf_i C(i) > 0\),
\(\sup_i R(i) < \infty\)
Neither one of the conditions are met in this problem. Below, we define the optimality equation and simulate the problem. We see that the optimality equation consistent with simulations return negative the initial wealth value.
R <- function(i) return(i)
#' Optimality equation
#'
#' @param n the stage
#' @param i the wealth
#'
#' @return the expected cost
#' @export
V <- function(n, i){
if(n==0) return (-R(i))
return(1/2*V(n-1, i-1)+1/2*V(n-1, i+1))
}
#' Simulation function for the non-stable stopping problem
#'
#' @param n the stage
#' @param i the wealth
#'
#' @return the simulated cost
#' @export
simulate.non.stable.problem <- function(n, i){
forward.prob <- rbinom(1, 1, 0.5)
for(j in 1:n){
if(forward.prob==1){
i <- i + 1
}else{
i <- i - 1
}
}
return(-i)
}
results.df <- data.frame(i = integer(), Simulation = numeric(), Theory = numeric())
i.seq <- -5:15
N <- 20
for(i in i.seq){
sims <- mean(replicate(5000, simulate.non.stable.problem(N, i)))
theory <- V(N, i)
results.df <- rbind(results.df, data.frame(i = i, Simulation = sims, Theory = theory))
}
ggplot(results.df)+geom_point(aes(x=i, y=Simulation), color = "#D96098", stroke = 2, shape = 8)+
geom_line(aes(x=i, y=Theory), color = "#D96098", size = 1.5)+
theme(
panel.grid.major = element_blank(),
panel.grid.minor = element_blank(),
panel.background = element_blank(),
axis.line = element_line(colour = "black"),
plot.title = element_text(hjust = 0.5, size=20),
axis.text = element_text(size = 15),
text = element_text(size=18))+labs(tag="A", y="Value", x="i", title="Non-stable stopping; N=20")
Suppose you have an initial wealth of \(i\) and you are given the option to play a game for as many times as you want with no cost. Each time you play the game, you have a 50% chance of winning and tripling your wealth or losing everything you have. Mathematically if you were to play this game for only 1 stage and your initial wealth is 1000 dollars then your expected reward would be 1500 dollars. However, intuitively you would most likely avoid playing. Similar to the previous problem, neither of the conditions for negative dynamic programming are met for this problem. Below, we simulate this problem and show that consistent with the optimality equation the more we play the game the lower would be our cost. Then we show that the probability of losing all of our money from playing this game repeatedly increases as the number of stages increases.
#' Bellman equation for the triple or nothing problem
#'
#' @param n the number of stages
#' @param i the initial wealth
#'
#' @return expected cost
#' @export
V <- function(n, i){
if(n==0)return(-i)
return(1/2*V(n-1, 3*i)+1/2*V(n-1, 0))
}
#' Simulating the triple or nothing problem
#'
#' @param n the number of stages
#' @param i the initial wealth
#'
#' @return simulated cost
#' @export
triple.or.nothing.simulation <- function(n, i){
for(j in 1:n){
if(rbinom(1, 1, 0.5)==1){
i <- i*3
}else{
#return(0)
i<-0
}
}
return(-i)
}
results.df <- data.frame(i = integer(), Simulation = numeric(), Theory = numeric())
i <- 10
N.seq <- seq(1, 10, 1)
num.simulations <- 50000
for(N in N.seq){
sims <- mean(replicate(num.simulations, triple.or.nothing.simulation(N, i)))
theory <- V(N, i)
results.df <- rbind(results.df, data.frame(N = N, Simulation = sims, Theory = theory))
}
ggplot(results.df)+geom_point(aes(x=N, y=Simulation), color = "#D96098", stroke = 2, shape = 8)+
geom_line(aes(x=N, y=Theory), color = "#D96098", size = 1.5)+
theme(
panel.grid.major = element_blank(),
panel.grid.minor = element_blank(),
panel.background = element_blank(),
axis.line = element_line(colour = "black"),
plot.title = element_text(hjust = 0.5, size=20),
axis.text = element_text(size = 15),
text = element_text(size=18))+labs(tag="A", y="Value", x="N", title="Triple or nothing; i=10")
Looking at this plot it is tempting to think we should play this game more and more to minimize our expected cost. But let’s try to find out the probability of losing all of our money from playing this game.
loss.sim <- c()
num.simulations <- 1000
for(stage in seq(1, 25)){
loss.sim <- c(loss.sim, sum(replicate(num.simulations, triple.or.nothing.simulation(stage, 5)) == 0)/num.simulations)
}
#' Probability of losing everything from playing the triple bet or nothing problem
#'
#' @param N the number of stages
#'
#' @return the theoretical probability of losing all of our money
#' @export
loss.theory <- function(N){
sum.hold <- 0
for(n in 1:N){
sum.hold <- sum.hold + 1/2^n
}
return(sum.hold)
}
unlist(lapply(seq(1, 25, 1), loss.theory))->loss.theory
loss.df <- data.frame(i=seq(1, 25, 1), sim=loss.sim, theory=loss.theory)
ggplot(loss.df)+geom_point(aes(x=i, y=sim), color = "#D96098", stroke = 2, shape = 8)+
geom_line(aes(x=i, y=theory), color = "#D96098", size = 1.5)+
theme(
panel.grid.major = element_blank(),
panel.grid.minor = element_blank(),
panel.background = element_blank(),
axis.line = element_line(colour = "black"),
plot.title = element_text(hjust = 0.5, size = 20),
axis.text = element_text(size = 15),
text = element_text(size = 18))+labs(tag="B", title="Probability of total loss", x="N", y="Probability")
We can see that the sooner we stop playing this game, the better it is because our chances of losing all of our money is less.
Suppose that offers come in daily for an asset we own. Each offer, independent of others, will be \(j\) with probability \(P_j\), \(j \geq 0\). (An offer of zero can be interpreted to mean that no offer arrived that day.) Once the offer is made it must be accepted or rejected, and for each day the item remains unsold we incur a maintenance cost \(C\), \(C > 0\). What policy maximizes our expected return, where by return we mean the accepted value minus the total maintenance cost incurred?
With the state at any time being the offer made at that time, it is intuitive that the optimal policy should be of the following form: accept any offer whose value is at least \(i^*\). However, such a set of states is not closed (because offers can decline), thus this problem will not satisfy the conditions of Theorem 2.2. (Check this last statement directly.)
Let us change the model somewhat by supposing that we are allowed to recall any past offer. In this case, the state at any time will be the maximum offer received by that time. Hence, the transition probabilities \(P_{ij}\) are given by \[ P_{ij} = \begin{cases} 0 & \text{if } j < i, \\ \sum_{k=0}^{i} P_k & \text{if } j = i, \\ P_j & \text{if } j > i. \end{cases} \]
The stopping set for the one-stage look-ahead policy is \[ B = \left\{ i : i \geq i \sum_{j=0}^{i} P_j + \sum_{j=i+1}^{\infty} j P_j - C \right\} \] which simplifies to \[ B = \left\{ i : C \geq \sum_{j=i+1}^{\infty} (j - i) P_j \right\} = \left\{ i : C \geq E[(X - i)^+] \right\}, \] where \(X\) is a random variable representing the offer in a given day, that is, \(P(X = j) = P_j\), and \((X - i)^+ = \max(X - i, 0)\). Because \((X - i)^+\) decreases in \(i\), we see that in as much as the state cannot decrease (the maximum offer ever received cannot decrease in time), \(B\) is a closed set. Hence, assuming stability (which can be shown to hold when \(E(X^2) < \infty\)), we see that the optimal policy accepts the first offer that is at least \(i^*\), where \[ i^* = \min \left\{ i : C \geq \sum_{j=i+1}^{\infty} (j - i) P_j \right\}. \]
Also, because the optimal policy never recalls a past offer, it is also a legitimate policy for the original problem in which such recall is not allowed. Hence, it must be optimal for that problem also. (This follows because it is clear that the maximal return when no recall is allowed cannot be larger than when recall is allowed.)
#' Simulate the asset problem
#'
#' @param offers sequence of offers
#' @param lambda the Poisson parameter for offer arrivals
#' @param C the cost of holding onto asset
#'
#' @return a list that contains: 1. the index of the best offer 2. the reward found through the optimal strategy
#' @export
simulate.problem <- function(offers, lambda, C){
sum.hold <- 0
for(offer.idx in seq_along(offers)){
max(offers[1:offer.idx])->i
for(j in seq(i+1, qpois(0.99999999, lambda), 1)){
sum.hold <- (j-i)*dpois(j, lambda)
}
if(sum.hold <= C){
return(list(offer.idx, offers[offer.idx] - (offer.idx-1)*C))
}
}
return(list(length(offers), offers[length(offers)] - (length(offers)-1)*C))
}
Simulate the problem, the dashed line indicates how often the difference between the reward found through simulation and the best possible reward falls within one standard deviation from the Poisson distribution.
C <- 10
lambda <- 1000
n <- 1000
num.simulations <- 10000
sim.actual.diff <- c()
sim.reward <- c()
best.reward <- c()
for(sim in 1:num.simulations){
offers <- rpois(n, lambda)
sim.reward <- c(sim.reward, simulate.problem(offers, lambda, C)[[2]])
best.reward <- c(best.reward, max(offers - seq(0, length(offers) - 1)*C))
sim.actual.diff <- c(sim.actual.diff, max(offers - seq(0, length(offers) - 1)*C) - simulate.problem(offers, lambda, C)[[2]])
}
sim.rewards <- data.frame(sim.reward, best.reward, sim.actual.diff)
ggplot(sim.rewards) +
geom_density(aes(x=sim.actual.diff, fill="Difference"), color="black", alpha=0.8)+
geom_density(aes(x=sim.reward, fill="Optimal strategy"), color="black", alpha=0.8)+
geom_density(aes(x=best.reward, fill="Best reward"), color="black", alpha=0.8)+
geom_vline(xintercept=sqrt(lambda), linetype='dashed', size=1.5)+
theme(
panel.grid.major = element_blank(),
panel.grid.minor = element_blank(),
panel.background = element_blank(),
axis.line = element_line(colour = "black"),
plot.title = element_text(hjust = 0.5, size = 20),
axis.text = element_text(size = 15),
text = element_text(size = 18)
)+labs(title = "Best vs optimal strategy reward", x="Reward", y="Density", tag="A", fill="Reward")+
scale_fill_manual(values=c("Difference" = "#B8001F", "Optimal strategy" = "#257180", "Best reward" = "#001F3F"))
Suppose that we are not aware what distribution the offers come from but we know that they may come from \(N\) distributions with probabilities \(p_1, p_2, p_3, ..., p_N\). In the example below, we assume that the distributions are exponential and we simulate the problem under the one-stage look ahead policy and compare it to the best possible reward we can gain from the offers.
Let us first see if we can figure out which distribution the observations are coming from using Bayesian inference.
The posterior probability would get updated through:
\(P(\lambda_i|X_1=x) = \frac{P(X_1 = x \mid \lambda_i) \cdot P(\lambda_i)}{P(X_1 = x) = \sum_{i=1}^{N} P(X_1 = x \mid \lambda_i) \cdot p_i}\)
#' Update the posteriors
#'
#' @param x the observation of offer
#' @param lambdas the lambdas for the exponential distributions from which the observations come from
#' @param priors the priors for each distribution
#'
#' @return the updated posterior distribution
#' @export
update.posterior <- function(x, lambdas, priors){
updated.posteriors <- c()
for(idx in 1:length(lambdas)){
lambda <- lambdas[idx]
prior <- priors[idx]
sum.hold <- 0
for(j in 1:length(lambdas)){
sum.hold <- sum.hold + dexp(x, lambdas[j])*priors[j]
}
updated.posteriors <- c(updated.posteriors, dexp(x, lambda)*prior/sum.hold)
}
return(updated.posteriors)
}
#Let's first confirm that updating the posterior distribution works.
Mu.vector <- c(6, 4, 8, 10, 3)
lambdas <- 1/Mu.vector
priors <- c(0.2, 0.65, 0.05, 0.1, 0.1)
chosen.lambda <- sample(lambdas, size = 1, prob = priors)
rexp(100, chosen.lambda)->random.nums
for(i in 1:100){
priors <- update.posterior(random.nums[i], lambdas, priors)
}
Now that we have confirmed that our code is correct, let’s move on to the optimal stopping problem with the one-step look ahead strategy. We would stop if the cost of holding on to the asset exceeds the cost of the expected value of the difference between the next offer and the current offer.
#' Simulate the asset problem with different distributions
#'
#' @param N the number of stages
#' @param lambdas the parameters for the exponential distributions from which the observations come from
#' @param priors the prior probability of the observations coming from each distribution
#' @param C the daily cost of holding on to the asset
#'
#' @return the simulated reward
#' @export
simulate.problem <- function(N, lambdas, priors, C){
chosen.lambda <- sample(lambdas, size = 1, prob = priors) #randomly draw the distribution
observations <- rexp(N, chosen.lambda) #draw the observation from the randomly chosen distribution
for(n in 1:N){
X <- observations[n]
priors <- update.posterior(X, lambdas, priors)
if(C >= (sum(1/lambdas*priors) - X)){ #stopping condition
return(list(X - (n-1)*C, observations, n, chosen.lambda))
}
}
return(X - (N-1)*C, observations, N, chosen.lambda) #if didn't stop until the end
}
#for the sake of comparison, let's compare the results to the case where we look at the first 37% of offers and select the first one that exceeds them.
#' Policy ii simulations, under this policy we first reject the first 37% of offers and select the first one that exceeds the first 37%.
#'
#' @param offers the list of offers
#' @param C the daily cost of holding onto the asset
#'
#' @return the simulated reward
#' @export
policy.ii <- function(offers, C){
threshold.value <-max(offers[1:(0.37*length(offers))])
remaining.offers <- offers[(0.37*length(offers) + 1):length(offers)]
first.better.offer <- remaining.offers[which(remaining.offers > threshold.value)][1]
if(is.na(first.better.offer)){
return(offers[length(offers)] - C*length(offers))
}
return(first.better.offer - which(offers==first.better.offer)*C)
}
lambdas <- c(600, 300, 800, 1000, 250)
lambdas <- 1/lambdas
priors <- c(0.2, 0.65, 0.05, 0.1, 0.1)
sim.lst <- simulate.problem(100, lambdas, priors, 0.05)
c() -> best.reward
c() -> optimal.policy.reward
c() -> sim.actual.diff
C <- 4
for(i in 1:10000){
sim.lst <- simulate.problem(100, lambdas, priors, C)
offers <- sim.lst[[2]]
best.reward <- c(best.reward, max(offers - seq(0, length(offers) - 1)*C))
sim.reward <- c(optimal.policy.reward, sim.lst[[1]])
sim.actual.diff <- c(sim.actual.diff, policy.ii(offers, C))
}
sim.rewards <- data.frame(sim.reward, best.reward, sim.actual.diff)
ggplot(sim.rewards) +
geom_density(aes(x=sim.actual.diff, fill="Secretary strategy"), color="black", alpha=0.8)+
geom_density(aes(x=sim.reward, fill="One-stage look ahead"), color="black", alpha=0.8)+
geom_density(aes(x=best.reward, fill="Best reward"), color="black", alpha=0.8)+
theme(
panel.grid.major = element_blank(),
panel.grid.minor = element_blank(),
panel.background = element_blank(),
axis.line = element_line(colour = "black"),
plot.title = element_text(hjust = 0.5, size = 20),
axis.text = element_text(size = 15),
text = element_text(size = 18)
)+labs(title = "Asset problem/different distributions", x="Reward", y="Density", tag="A", fill="Reward")+
scale_fill_manual(values=c("Secretary strategy" = "#B8001F", "One-stage look ahead" = "#257180", "Best reward" = "#001F3F"))+
xlim(c(-1000, 5000))
On each attempt, a burglar is successful with probability \(p\). If successful, his loot will be \(j\) with probability \(P_j\), where \(P_j \geq 0\) and \(\sum_j P_j = 1\). If unsuccessful, he loses everything (goes to jail) and the problem ends. If the burglar is allowed to retire at any time and keep all the loot he has accumulated up to that time, what is his optimal policy?
This is a stopping problem in which the state at any time is the burglar’s total loot at that time. The optimality equation is \[ V(i) = \max \left( i, p \sum_{j=0}^{\infty} V(i+j) P_j \right), \quad i > 0. \] The one-stage look-ahead policy is to stop when in \(B\), \[ B = \left\{ i : i \geq p \sum_j (i+j) P_j \right\}, \] which simplifies to \[ B = \left\{ i : i \geq \frac{p}{1 - p} E(L) \right\}, \] where \(E(L) = \sum_j j P_j\) is the mean loot of a successful burglary, assumed to be finite.
Because the state cannot decrease (let the state be infinite if the burglar is caught), it follows that \(B\) is closed, and thus (assuming stability) the one-stage look-ahead policy is optimal.
#' Expected accumulated wealth function
#'
#' @param N the number of timepoints for theft
#' @param n the current stage
#' @param i the initial wealth
#' @param p the probability of successful theft at each stage
#' @param lambda the Poisson distribution parameter for the
#'
#' @return the expected accumulated wealth
#' @export
V <- function(N, n, i, p, lambda){
if(n == N){
return(i)
}
lower <- qpois(0.0001, lambda)
upper <- qpois(0.9999, lambda)
sum.hold <- 0
for(j in seq(lower, upper, 1)){
sum.hold <- sum.hold + V(N, n+1, i+j, p, lambda)*dpois(j, lambda)
}
sum.hold * p -> sum.hold
return(max(i, sum.hold))
}
#' Find the critical wealth to stop looting
#'
#' @param p the probability of succeeding at theft
#' @param i the initial wealth
#' @param lambda the Poisson distribution for the value of asset we are stealing
#'
#' @return the critical wealth value
#' @export
find.critical.loot <- function(p, i, lambda){
lower <- qpois(0.0001, lambda)
upper <- qpois(0.9999, lambda)
sum.hold <- 0
for(j in seq(lower, upper, 1)){
sum.hold <- sum.hold + (i+j)*dpois(j, lambda)
}
sum.hold * p -> sum.hold
return(sum.hold)
}
#' Simulate the burglar problem
#'
#' @param N the number of timepoints for theft
#' @param i the initial wealth
#' @param p the probability of successful theft at each stage
#' @param lambda the Poisson distribution parameter for the
#' @param tweaking.policy added this for the sake of comparison to optimal strategy, by default the variable is set to 0. If changed it will add the value stored in this variable to the critical wealth and no longer return the optimal simulated wealth.
#'
#' @return the simulated accumulated wealth
#' @export
simulate.problem <- function(N, i, p, lambda, tweaking.policy = 0){
for(n in 1:N){
find.critical.loot(p, i, lambda)->critical.loot
if(critical.loot>=(i+tweaking.policy)){
random.loot <- rpois(1, lambda)
successful.loot <- rbinom(1, 1, p)
if(successful.loot){
i <- random.loot + i
}else{
return(0)
}
}
}
return(i)
}
N = 6; initial.wealth = 3; lambda = 5
hold.theory <- c()
hold.simulations <- c()
hold.simulations.pi.1 <- c()
hold.simulations.pi.2 <- c()
for(p in seq(0, 1, 0.1)){
hold.theory <- c(hold.theory, V(N, 1, initial.wealth, p, lambda))
hold.simulations <- c(hold.simulations, mean(replicate(1000, simulate.problem(N,initial.wealth, p, lambda))))
hold.simulations.pi.1 <- c(hold.simulations.pi.1, mean(replicate(1000, simulate.problem(N,initial.wealth, p, lambda, tweaking.policy = 3))))
hold.simulations.pi.2 <- c(hold.simulations.pi.2, mean(replicate(1000, simulate.problem(N,initial.wealth, p, lambda, tweaking.policy = -3))))
}
ggplot()+
geom_point(aes(x=seq(0, 1, 0.1), y=hold.simulations, colour="Optimal policy sims"), shape=8, stroke=1.5)+
geom_line(aes(x=seq(0, 1, 0.1), y=hold.theory, colour="Optimal policy theory"), size=1)+
geom_point(aes(x=seq(0, 1, 0.1), y=hold.simulations.pi.1, colour="Policy I sims"), shape=8, stroke=1)+
geom_point(aes(x=seq(0, 1, 0.1), y=hold.simulations.pi.2, colour="Policy II sims"), shape=8, stroke=1)+
theme(
panel.grid.major = element_blank(),
panel.grid.minor = element_blank(),
panel.background = element_blank(),
axis.line = element_line(colour = "black"),
plot.title = element_text(hjust = 0.5, size = 20),
axis.text = element_text(size = 15),
text = element_text(size = 18)
)+scale_color_manual(values=c("Optimal policy sims" = "#243642", "Optimal policy theory" = "#387478",
"Policy I sims" = "#7E60BF", "Policy II sims" = "#E4B1F0"))+
labs(tag = "A", title ="Burglar problem", x="Successful theft probability (p)", y="Accumulated wealth", colour="Policy")
The following variation is taken from:
Chen, K., & Ross, S. M. (2014). The burglar problem with multiple options. Naval Research Logistics (NRL), 61(5), 359-364.
Assume that the burglar faces multiple assets at each timepoint with a certain probability of a successful theft and rewards coming from an exponential distribution. The question is how do we change the Bellman equation in the previous problem to understand which asset type to burgle at each period in order to maximize the expected return.
#' Bellman equation function for the multi-option burglar problem
#'
#' @param n Number of timepoints for theft
#' @param r the current wealth of the burglar
#' @param q the vector containing the probability of successful theft for assets
#' @param lambda the vector containing the rate parameters for the exponential distribution from which the
#' reward comes from
#'
#' @return
#' @export
V <- function(n, r, q, lambda){
if(n == 0){
return(r)
}
hold.max <- -Inf
track.i <- 0
for(i in 1:length(lambda)){
integrand <- function(t){
return(lambda[i]*exp(-lambda[i]*t)*V(n-1, r+t, q, lambda)[[1]])
}
integrated.res <- q[i]*integrate(integrand, lower = 0, upper = Inf)$value
if(integrated.res > hold.max){
hold.max <- integrated.res
track.i <- i
}
}
return(list(max(r, hold.max), track.i))
}
Below, we show the expected return from the optimal policy for three periods only.
q <- c(0.5, 0.6, 0.75)
lambda <- c(1/10, 1/9, 1/4)
accumulated.wealth <- c()
tracking.i <- c()
for(r in seq(0, 100, 1)){
accumulated.wealth <- c(accumulated.wealth, V(3, r, q, lambda)[[1]])
tracking.i <- c(tracking.i, V(3, r, q, lambda)[[2]])
}
types <- length(q)
for(i in 1:types){
name.first <- paste0("first.", i)
name.last <- paste0("last.", i)
assign(name.first, which(tracking.i == i)[1])
assign(name.last, which(tracking.i == i)[length(which(tracking.i == i))])
}
ggplot()+
#geom_tile(aes(x = seq(0, 100, 1), y = 10, fill = factor(tracking.i)), height = 200)+
geom_rect(aes(xmin = first.1-1, xmax = last.1, ymin=0, ymax = 100, fill='1'))+
geom_rect(aes(xmin = first.2-1, xmax = last.2, ymin=0, ymax = 100, fill='2'))+
geom_rect(aes(xmin = first.3-1, xmax = last.3, ymin=0, ymax = 100, fill='3'))+
geom_line(aes(x=seq(0, 100, 1), y=accumulated.wealth), size=1.5)+
theme(
panel.grid.major = element_blank(),
panel.grid.minor = element_blank(),
panel.background = element_blank(),
axis.line = element_line(colour = "black"),
plot.title = element_text(hjust = 0.5, size = 20),
axis.text = element_text(size = 15),
text = element_text(size = 18)
)+
scale_fill_manual(values=c('1' = '#BEAEE2', '2' = '#629584', '3' = '#CDF0EA'))+
labs(tag = "A", title = "N = 3; multi-option burglar problem", y='Expected return', x='Wealth (r)', fill='Asset type')
Consider the classic burglar problem with the change that his success probability changes over time. Specifically, suppose that on his \(i\)th burglary he will be successful with probability \(P_i\), \(i \geq 1\).
[(a)] Show that the structure of the optimal policy is as follows:
If the burglar has been successful on his first \((i - 1)\) burglaries, \(i \geq 1\), then the next one should be attempted if his total loot is less than \(a_i\), for suitably chosen constants \(a_i\), \(i \geq 1\).
[(b)] Give the one-stage look-ahead policy and determine conditions on the \(P_i\), \(i \geq 1\), that ensure its optimality. Under these conditions show that \(a_i\), \(i \geq 1\), is a monotone sequence.
For the example below, we assume that the asset we are burgling comes from an exponential distribution with mean \(\frac{1}{\lambda}\) and the probability of success at each stage comes from a truncated normal distribution.
#' The expected threshold for deciding to loot at each stage
#'
#' @param r the wealth of the burglar
#' @param lambda the parameter for the exponential distribution from which the asset comes from
#'
#' @return the expected critical loot value
#' @export
expected.next.loot <- function(r, lambda, mean = 0.7, sd = 0.1){
integrand <- function(t){
return(lambda*exp(-lambda*t) * (r+t))
}
integrated.res <- integrate(integrand, lower = 0, upper = Inf)$value
return(integrated.res*etruncnorm(a = 0, b = 1, mean = mean, sd = sd))
}
#set parameters
lambda <- 1 / 40
r.vals <- seq(0, 100, by = 10)
expected.loot.thresholds <- sapply(r.vals, function(r) expected.next.loot(r, lambda))
result.df <- data.frame(r = r.vals, LootThreshold = expected.loot.thresholds)
ggplot(result.df)+geom_line(aes(x=r.vals, y=LootThreshold), size=1.5, color ="#001F3F")+
theme(
panel.grid.major = element_blank(),
panel.grid.minor = element_blank(),
panel.background = element_blank(),
axis.line = element_line(colour = "black"),
plot.title = element_text(hjust = 0.5, size = 20),
axis.text = element_text(size = 15),
text = element_text(size = 18)
)+labs(tag="A", x="r", y="Loot threshold", title=TeX("$a_i$ is a monotone sequence"))
Simulate the problem
#' Simulating the burglary problem with varying probability of success
#'
#' @param N the number of stages
#' @param r the burglar's wealth
#' @param lambda the parameter for the exponential distribution from which the asset comes from
#' @param tweaking.policy if you want to change the policy from optimal to a non-optimal policy simulation then change this parameter
#' @param mean the mean for the truncated normal distribution from which the probability of successful burgling comes from
#' @param sd the standard deviation for the truncated normal distribution from which the probability of successful burgling comes from
#'
#' @return the simulated wealth
#' @export
simulate.problem <- function(N, r, lambda, tweaking.policy = 0,mean=0.7, sd=0.2){
for(n in 1:N){ #go over all stages
expected.next.loot(r, lambda)->critical.loot
if(critical.loot>=(r+tweaking.policy)){
random.loot <- rexp(1, lambda)
p <- rtruncnorm(1, a = 0, b = 1, mean = mean, sd = sd)
successful.loot <- rbinom(1, 1, p)
if(successful.loot){
r <- random.loot + r
}else{ #if caught
return(0)
}
}
}
return(r)
}
#To run this fast, we simply take the expected values of the distributions
#' Title
#'
#' @param N the number of stages
#' @param r the burglar's wealth
#' @param lambda the parameter for the exponential distribution from which the asset comes from
#' @param mean the mean for the truncated normal distribution from which the probability of successful burgling comes from
#' @param sd the standard deviation for the truncated normal distribution from which the probability of successful burgling comes from
#' @return the expected accumulated wealth
#' @export
V <- function(N, r, lambda, mean=0.7, sd=0.2){
if(N == 0){
return(r)
}
final.integration.res <- etruncnorm(a = 0, b = 1, mean = 0.7, sd = 0.2)*V(N-1, r + 1/lambda, lambda)
return(max(final.integration.res, r))
}
varying.success.prob <- c()
simulation.probs <- c()
for(r in seq(0, 100, 10)){
varying.success.prob <- c(varying.success.prob, V(50, r, 1/40))
simulation.probs <- c(simulation.probs, mean(replicate(1000, simulate.problem(50, r, 1/40))))
}
ggplot()+
geom_line(aes(x=seq(0, 100, 10), y=varying.success.prob), color="#3A6D8C", size=1.5)+
geom_point(aes(x=seq(0, 100, 10), y=simulation.probs), color="#001F3F", stroke=1.25, shape=8)+
theme(
panel.grid.major = element_blank(),
panel.grid.minor = element_blank(),
panel.background = element_blank(),
axis.line = element_line(colour = "black"),
plot.title = element_text(hjust = 0.5, size = 20),
axis.text = element_text(size = 15),
text = element_text(size = 18)
)+labs(tag="B", title="Burglary with varying success probability\n N=50", x="r", y="Expected theft")
In a certain fishing location there are \(N\) types of fish; and each time a fish is caught, then, independent of the types of those previously caught, it will be of type \(i\) with probability \(P_i, \; i = 1, \ldots, N\). At each stage the decision maker can either pay a cost \(C\) and fish during that time period or he can quit. If he fishes, he will catch a fish with probability \(\alpha\); and if he quits, he will receive a terminal reward \(R(n)\) if he has caught \(n\) distinct types of fish. Let us determine the optimal policy.
The state at any time is the set \(S\) of distinct types of fish that have been caught. The one-stage look-ahead policy would stop at \(S\) if \(S \in B\), where
\[ B = \{S : R(|S|) \geq \alpha \bar{P}(S) R(|S| + 1) + \left[ 1 - \alpha \bar{P}(S) \right] R(|S|) - C \}, \]
where
\[ \bar{P}(S) = \sum_{i \notin S} P_i, \]
and \(|S|\) denotes the number of elements in \(S\). We can rewrite \(B\) as
\[ B = \{S : R(|S| + 1) - R(|S|) \leq \frac{C}{\alpha \bar{P}(S)} \}. \]
If we now suppose that \(R(n)\) is a concave function, that is, \(R(n+1) - R(n)\) decreases in \(n\), then because \(S\) can only become larger [and thus \(R(|S| + 1) - R(|S|)\) can only decrease, whereas \(\frac{C}{\alpha \bar{P}(S)}\) can only increase] it follows that \(B\) is closed. Hence, the one-stage policy is optimal when \(R\) is concave.
#' P bar
#'
#' @param unique.types the unique types of fish caught so far
#' @param lambda the lambda parameter for the Poisson distribution from which the fish type is taken from
#'
#' @return P bar
#' @export
P_bar <- function(unique.types, lambda){
lower <- qpois(0.000000001, lambda)
upper <- qpois(0.999999999, lambda)
return(sum(dpois(setdiff(seq(lower, upper, 1), unique.types), lambda)))
}
#' Reward function - defined as concave
#'
#' @param x the number of unique fish caught
#'
#' @return the reward
#' @export
R <- function(x){
return(100*sqrt(x))
}
Let’s start with the simulations first.
#' Simulate the fishing problem
#'
#' @param N the number of stages
#' @param alpha the probability of successfully catching a fish
#' @param lambda the parameter for the Poisson distribution from which the type of the fish comes from
#' @param C the cost of fishing at each stage
#'
#' @return the simulated return
#' @export
simulate.fishing.problem <- function(N, alpha, lambda, C){
cost <- 0
stopped <- FALSE
types <- c()
stages <- 1
while(!stopped && stages <= N){
unique.types <- unique(types)
if(length(unique.types)>=1 && ((R(length(unique.types)+1) - R(length(unique.types))) <= C/(alpha*P_bar(unique.types, lambda)))){
stopped <- TRUE
}
cost <- C + cost
stages <- stages + 1
probability.success.fish <- rbinom(1, 1, alpha)
if(probability.success.fish){
types <- c(types, (rpois(1, lambda)))
}
}
return(R(length(unique(types))) - cost)
}
#' The optimality equation function for the fishing problem
#'
#' @param N the number of stages
#' @param S the number of unique fish types caught so far
#' @param alpha the probability of catching a fish successfully
#' @param lambda the parameter for the Poisson distribution from which the type of the fish comes from
#' @param C the cost of fishing at each stage
#'
#' @return the expected return
#' @export
V <- function(N, S, alpha, lambda, C) {
#Base case, you have no stages left to fish your reward depends on how many unique fish types you have caught.
if (N == 0) {
return(R(S))
}
unique.types <- 0:(S - 1) #Let's store fish types up to this point in a variable
P.new <- P_bar(unique.types, lambda) #Probability of catching a new fish
#We have two actions:
#I. We stop and receive a reward based on the number of unique fish we have caught
#II. We continue and we either fail to successfully catch a fish or we fail to catch a unique fish OR we catch a unique fish successfully.
return(max(R(S), -C + alpha * P.new * V(N - 1, S + 1, alpha, lambda, C) + (alpha * (1 - P.new) + (1 - alpha)) * V(N - 1, S, alpha, lambda, C)))
}
theory.vector <- c()
sim.vector <- c()
lambda <- 3
for(C in seq(0,40 ,1)){
c(theory.vector, V(10, 0, 0.8, lambda, C)) -> theory.vector
c(sim.vector, mean(replicate(1000, simulate.fishing.problem(10, 0.8, lambda, C)[[1]]))) -> sim.vector
}
results <- data.frame(cost = seq(0, 40, 1), theory = theory.vector, simulations = sim.vector)
ggplot(results)+
geom_line(aes(x=cost, y=theory, colour="Theory"), size=1.5)+
geom_point(aes(x=cost, y=simulations, colour="Sims"), shape=8, stroke=1.5)+
theme(
panel.grid.major = element_blank(),
panel.grid.minor = element_blank(),
panel.background = element_blank(),
axis.line = element_line(colour = "black"),
plot.title = element_text(hjust = 0.5, size = 20),
axis.text = element_text(size = 15),
text = element_text(size = 18))+
scale_color_manual(values=c("Theory" = "#387478", "Sims" = "#243642"))+
labs(x="Cost (C)", y= "Reward", title = "Fishing N=10", colour = "", tag="A")
We start a distance of \(N\) parking places from our destination. As we drive along we can see only one parking place at a time. Each parking place is, independently, free with probability \(p\). Determine the policy that minimizes the expected distance we need to walk.
For the problems below, we are consider a cost of \(C\) if we fail to find a parking at the end. Let’s first start by defining the Bellman equation. The boundary condition would be passing all the available parking slots and incurring a very large cost \(C\) (\(V_n(0)=C\)). The bellman equation can be written as \(p min[V_{n}(i-1), i] + (1-p)V_{n}(i-1)\).
#' Optimality equation
#'
#' @param N number of stages
#' @param i
#' @param p the probability of a parking slot being empty at each stage
#' @param C the cost that we incur if we fail to park our car
#'
#' @return the expected cost
#' @export
V <- function(N, i, p, C){
if(i==0){ #boundary condition
return(C)
}
return(p*min(i, V(N, i-1, p, C)) + (1-p)*V(N, i-1, p, C))
}
To simulate the problem, we use a one-stage look ahead policy where we consider the expected cost of continuing to search for parking slots vs stopping.
#' Function for finding the threshold
#'
#' @param N number of stages
#' @param i
#' @param p the probability of a parking slot being empty at each stage
#' @param C the cost that we incur if we fail to park our car
#'
#' @return the calculated threshold for the one stage look-ahead policy
#' @export
cost.threshold <- function(N, i, p, C){
if(i==0){
return((1-p)*(C))
}
return(p*i+(1-p)*cost.threshold(N, i-1, p, C))
}
#' Simulation function for expected cost
#'
#' @param N number of stages
#' @param i
#' @param p the probability of a parking slot being empty at each stage
#' @param C the cost that we incur if we fail to park our car
#'
#' @return the simulated cost
#' @export
simulate.problem <- function(N, i, p, C){
stages <- rev(seq(0, N, 1))
for(i in seq_along(stages)){
threshold <- cost.threshold(N, stages[i], p, C) #one stage look ahead policy
if(stages[i] <= threshold){
if(rbinom(1, 1, p)){
return(stages[i])
}
}
}
return(C)
}
C <- 100
N <- 10
sim.vector <- c()
theory.vector <- c()
for(p in seq(0.1, 0.9, 0.05)){
sim.vector<- c(sim.vector, mean(replicate(1000, simulate.problem(N, N, p, C))))
theory.vector <- c(theory.vector, V(N, N, p, C))
}
ggplot()+geom_line(aes(x=seq(0.1, 0.9, 0.05), y=theory.vector), size=1.5, color="#D96098")+
geom_point(aes(x=seq(0.1, 0.9, 0.05), y=sim.vector), stroke=1.5, shape=8, color="#D96098")+
theme(
panel.grid.major = element_blank(),
panel.grid.minor = element_blank(),
panel.background = element_blank(),
axis.line = element_line(colour = "black"),
plot.title = element_text(hjust = 0.5, size=20),
axis.text = element_text(size = 15),
text = element_text(size=18))+
labs(title="Parking problem, N=10", x="Probability of empty parking slot", y="Cost")
An object is located in one of \(n\) possible locations, being in location \(i\) with probability \(P_i^0\), \(i = 1, \ldots, n\), \(\sum_i P_i^0 = 1\). A search of location \(i\) costs \(C_i\), \(C_i > 0\), and if the object is present, the probability that it will be discovered is \(\alpha_i\), \(i = 1, \ldots, n\). That is, \(1 - \alpha_i\) is the overlook probability for location \(i\). The objective is to discover the object at minimal expected cost.
This is a decision process whose state at any time is the posterior probability vector \(\mathbf{P} = (P_1, \dots, P_n)\), with \(P_i\) representing the posterior probability, given all that has occurred, that the object is in box \(i\). Letting \(V(\mathbf{P})\) denote the minimal expected cost function, then the optimality equation is given by
\[ V(\mathbf{P}) = \min_i \left[ C_i + (1 - \alpha_i P_i) V(T_i(\mathbf{P})) \right], \]
where \(T_i(\mathbf{P}) = \left[(T_i(\mathbf{P}))_1, \dots, (T_i(\mathbf{P}))_n \right]\) is the vector of posterior probabilities given the prior probability vector \(\mathbf{P}\) and given that a search of location \(i\) was unsuccessful. Hence,
\[ (T_i(\mathbf{P}))_j = P(\text{in } j \mid \text{search of } i \text{ unsuccessful}) \]
\[ = \begin{cases} \frac{P_j}{1 - \alpha_i P_i}, & j \neq i, \\ \frac{P_i (1 - \alpha_i)}{1 - \alpha_i P_i}, & j = i. \end{cases} \]
For a given state \(\mathbf{P}\), a policy can be regarded as a sequence of locations \((i_1, i_2, \dots)\) with the interpretation that the locations are searched in that order until the object is found.
#' Updating the posterior probability
#'
#' @param P the prior probability vector of the item being located at any location
#' @param i the location we are searching
#' @param alpha the probability of finding the item if it is present at that location
#'
#' @return the updated posterior probability of finding the item
#' @export
T_P <- function(P, i, alpha){
for(j in 1:length(P)){
if(i==j){
P[j] = P[i]*(1-alpha[i])/(1-alpha[i]*P[i])
}else{
P[j] = P[j]/(1-alpha[i]*P[i])
}
}
return(P)
}
#' The optimality equation
#'
#' @param N the number of stages
#' @param C the vector containing the cost of searching one location
#' @param alpha the vector of successfully finding the item at the location, if the item is located there
#' @param P the vector containing the probability of the item being located at a specific location
#'
#' @return the expected cost
#' @export
V <- function(N, C, alpha, P){
if(N==0){ #boundary condition, no stages left, we cannot search anymore
return(0)
}
hold.min <- Inf
for(i in 1:length(alpha)){ #find out which location we should search to minimize the cost
val <- C[i]+(1-alpha[i]*P[i])*V(N-1, C, alpha, T_P(P, i, alpha))
if(val < hold.min){
hold.min <- val
}
}
return(hold.min)
}
The optimal strategy is to search the location with the highest \(\max_i \frac{\alpha_i P_i}{C_i}\)
#' The simulation function
#'
#' @param N the number of stages
#' @param C the vector containing the cost of searching one location
#' @param alpha the vector of successfully finding the item at the location, if the item is located there
#' @param P the vector containing the probability of the item being located at a specific location
#'
#' @return the simulated cost
#' @export
simulate.problem <- function(N, C, alpha, P){
total.cost <- 0
actual.location <- sample(seq(1, length(P), 1), prob = P)[1] #place the item at one location randomly
for(n in 1:N){
search.location <- which.max(alpha*P/C) #find out where to search
total.cost <- total.cost + C[search.location]
found.item <- rbinom(1, 1, alpha[search.location])*(actual.location == search.location)
if(found.item==1){ #if the item is found, stop searching
return(total.cost)
}else{ #otherwise update the posterior probabilities
P <- T_P(P, search.location, alpha)
}
}
return(total.cost)
}
N <- 10
C <- c(10, 15, 20)
alpha <- c(0.7, 0.8, 0.65)
P_1 <- 0.3
V.vector <- c()
sim.vector <- c()
for(P_2 in seq(0.1, 0.6, 0.05)){
P <- c(P_1, P_2, 1-(P_1+P_2))
V.vector<- c(V.vector, V(N, C, alpha, P))
sim.vector <- c(sim.vector, mean(replicate(1000, simulate.problem(N, C, alpha, P))))
}
ggplot()+geom_point(aes(x=seq(0.1, 0.6, 0.05), y=sim.vector), shape=8, stroke=2, color = "#D96098")+
geom_line(aes(x=seq(0.1, 0.6, 0.05), y=V.vector), size=1.5, color = "#F7DBF0")+
theme(
panel.grid.major = element_blank(),
panel.grid.minor = element_blank(),
panel.background = element_blank(),
axis.line = element_line(colour = "black"),
plot.title = element_text(hjust = 0.5, size = 20),
axis.text = element_text(size = 15),
text = element_text(size = 18)
) + labs(tag="A", title=TeX("Optimal search; $P_1$ = 0.3, N = 10"), y="Cost", x=TeX("$p_2$"))
Now suppose that finding the item at a particular location \(i\) will result in a reward of \(R_i\). Let us simulate this problem under the optimal policy and compare it to the Bellman equation
#' The optimality equation
#'
#' @param N the number of stages
#' @param C the vector containing the cost of searching one location
#' @param alpha the vector of successfully finding the item at the location, if the item is located there
#' @param P the vector containing the probability of the item being located at a specific location
#' @param R the vector containing the reward for finding the item in a specific location
#'
#' @return the expected cost
#' @export
V <- function(N, C, alpha, P, R){
if(N==0){ #boundary condition
return(0)
}
hold.min <- Inf
for(i in 1:length(alpha)){
val <- C[i]- alpha[i]*P[i]*R[i] + (1-alpha[i]*P[i])*V(N-1, C, alpha, T_P(P, i, alpha), R)
if(val < hold.min){
hold.min <- val
}
}
return(min(0, hold.min))
}
#' Simulate the problem
#'
#' @param N the number of stages
#' @param C the vector containing the cost of searching one location
#' @param alpha the vector of successfully finding the item at the location, if the item is located there
#' @param P the vector containing the probability of the item being located at a specific location
#' @param R the vector containing the reward for finding the item in a specific location
#'
#' @return the simulated cost
#' @export
simulate.problem <- function(N, C, alpha, P, R){
total.cost <- 0
actual.location <- sample(seq(1, length(P), 1), prob = P)[1]
for(n in 1:N){
if(sum(P*alpha*R>=C)==0){ #stop if there is no point in continuing; for all of the
#locations the cost of continuing is more than the expected reward
return(total.cost)
}
search.location <- which.max(alpha*P/C)
total.cost <- total.cost + C[search.location]
found.item <- rbinom(1, 1, alpha[search.location])*(actual.location == search.location)
if(found.item==1){
return(total.cost - R[search.location])
}else{
P <- T_P(P, search.location, alpha) #updated posterior probability
}
}
return(total.cost)
}
#set parameters
N <- 10
C <- c(10, 15, 20)
alpha <- c(0.7, 0.8, 0.65)
P_1 <- 0.3
P_2 <- 0.2
P <- c(P_1, P_2, 1-(P_1+P_2))
R <- c(600, 500, 1000)
V.vector <- c()
sim.vector <- c()
for(P_2 in seq(0.1, 0.6, 0.05)){
P <- c(P_1, P_2, 1-(P_1+P_2))
V.vector<- c(V.vector, V(N, C, alpha, P, R))
sim.vector <- c(sim.vector, mean(replicate(1000, simulate.problem(N, C, alpha, P, R))))
}
ggplot()+geom_point(aes(x=seq(0.1, 0.6, 0.05), y=sim.vector), shape=8, stroke=2, color = "#D96098")+
geom_line(aes(x=seq(0.1, 0.6, 0.05), y=V.vector), size=1.5, color = "#F7DBF0")+
theme(
panel.grid.major = element_blank(),
panel.grid.minor = element_blank(),
panel.background = element_blank(),
axis.line = element_line(colour = "black"),
plot.title = element_text(hjust = 0.5, size = 20),
axis.text = element_text(size = 15),
text = element_text(size = 18)
) + labs(tag="B", title=TeX("Optimal search with rewards; $P_1$ = 0.3, N = 10"), y="Cost", x=TeX("$p_2$"))
Suppose a gambler enters a casino with a fortune of \(i\) and has the option of betting any positive integer amount from 0 to his current fortune (\(0 \l j \leq i\)). At each stage he has a probability \(p\) of winning the bet and earning \(j\) fortune and a probability \(1-p\) of losing \(j\). His goal is to reach a specific wealth \(N\). The question is what strategy maximizes his chance of reaching a fortune of \(N\) before going broke.
Timid strategy is defined as betting only 1 unit at each stage and a bold strategy is defined as betting his entire fortune or \(N-i\) at every stage.
Below we show that the optimal strategy would be to play timidly when the probability of winning the bet is greater than \(0.5\) and to play boldly otherwise.
#' Simulation function for gambling
#'
#' @param i the gambler's current fortune
#' @param N the amount the gambler wants to reach
#' @param p the probability of winning a bet
#' @param strategy either 'timid' or 'bold'
#'
#' @return the vector containing the gambler's wealth at every stage
#' @export
play.gambling.casino <- function(i, N, p, strategy){
broke <- FALSE
happy <- FALSE
wealth <- c(i)
while(!broke && !happy){ #process continues until the gambler is happy with his fortune reaching N units or goes broke
prob <- rbinom(1, 1, p)
if(strategy == "bold"){
bet <- if(i<=N/2)i else N-i
}else if(strategy=="timid"){
bet <- 1
}else{
stop("Invalid strategy provided. Use 'bold' or 'timid'.")
}
if(prob){
wealth <- c(wealth, wealth[length(wealth)] + bet)
}else{
wealth <- c(wealth, wealth[length(wealth)] - bet)
}
if(wealth[length(wealth)]==0){
broke<-TRUE
}else if(wealth[length(wealth)]==N){
happy<-TRUE
}
}
return(wealth)
}
#' Function for finding the probability of reaching a fortune of $N$
#'
#' @param i the gambler's current fortune
#' @param N the amount the gambler wants to reach
#' @param p the probability of winning a bet
#' @param strategy either 'timid' or 'bold'
#'
#' @return the probability of reaching fortune N
#' @export
U <- function(i, N, p, strategy){
q <- 1-p
if(i==0){
return(0)
}
if(strategy == "bold"){
if(i==N){
return(1)
}
if(N==0){
return(0)
}
if(i<=N/2) {return(p*U(2*i, N-1, p,strategy))} else {return(p+(1-p)*U(2*i-N,N-1, p, strategy))}
}else if(strategy == "timid"){
if(p==1/2) {return(i/N)} else {return((1-(q/p)^i)/(1-(q/p)^N))}
}else{
stop("Invalid strategy provided. Use 'bold' or 'timid'.")
}
}
Compare simulations and theory
N <- 20
i <- 10
p.seq <- seq(0, 1, 0.01)
num.simulations <- 1000
results.df <- data.frame(P = numeric(), BoldSimulation = numeric(), BoldTheory = numeric(), TimidSimulation = numeric(), TimidTheory = numeric())
for(p in p.seq){
timid.sims <- replicate(num.simulations, tail(play.gambling.casino(i, N, p, "timid"), 1))
timid.sims.res <- (sum(timid.sims == N))/num.simulations
timid.theory.res <- U(i, N, p, "timid")
bold.sims <- replicate(num.simulations, tail(play.gambling.casino(i, N, p, "bold"), 1))
bold.sims.res <- (sum(bold.sims == N))/num.simulations
bold.theory.res <- U(i, N, p, "bold")
results.df <- rbind(results.df, data.frame(P = p, BoldSimulation = bold.sims.res, BoldTheory = bold.theory.res, TimidSimulation = timid.sims.res, TimidTheory = timid.theory.res))
}
ggplot(results.df)+
geom_rect(aes(xmin=0, xmax=0.5, ymin=0, ymax=1), fill="#C4E1F6")+
geom_rect(aes(xmin=0.5, xmax=1, ymin=0, ymax=1), fill="#E2F1E7")+
geom_point(aes(x=P, y=BoldSimulation, colour="Bold sims"), shape=8, stroke=1.5)+
geom_point(aes(x=P, y=TimidSimulation, colour = "Timid sims"), shape=8, stroke=1.5)+
geom_line(aes(x=P, y=BoldTheory, colour = "Bold theory"), size=1.5)+
geom_line(aes(x=P, y=TimidTheory, colour = "Timid theory"), size=1.5)+
theme(
panel.grid.major = element_blank(),
panel.grid.minor = element_blank(),
panel.background = element_blank(),
axis.line = element_line(colour = "black"),
plot.title = element_text(hjust = 0.5, size=20),
axis.text = element_text(size = 15),
text = element_text(size=18))+
scale_color_manual(values=c("Bold sims" = "#629584", "Timid sims" = "#789DBC", "Bold theory" = "#629584", "Timid theory" = "#789DBC"))+
labs(x="Probability of winning", y="Probability of reaching N", title="N=20, i=10", colour="Strategy")