```

References

Design of the game

Guessing correctly the true probability \(\theta\) of the Bernoulli random number generator will earn us \(n\) dollars. We are presented with two possible \(\theta\) values. The game algorithm picks one of them randomly with either value having a selection probability of one half. This true \(\theta\) generates the random draw data behind the scene without us knowing. Thus, we already know that true \(\theta \in \{\theta_1, \theta_2\}\) (assume \(\theta_1 < \theta_2\)), but do not know which one of these is the true underlying \(\theta\). Because of this game algorithm, a flat prior (probability 1/2 for \(\theta_1\) and \(\theta_2\)) is justifiable (and quite objective). Thus, we can just work with the likelihood to determine the posterior probability of \(\theta_1\) and \(\theta_2\) given observation.

Each look at a generated number \(X_i \sim Bernoulli(\theta)\) costs $1, but gives more empirical evidence for either one of \(\theta_1\) or \(\theta_2\) is more likely. We want to optimize the strategy to maximize the payoff including the reward and cost of looks. Let \(i\) be the number of 1’s observed and \(j\) be the number of 0’s observed. The cost of observation is, thus, \(i+j\) dollars.

The likelihood of specific \(\theta\) given this observation is \(L(i,j,\theta) \propto \theta^i (1 - \theta)^j\). We pick either one of \(\theta_1\) or \(\theta_2\) that give higher likelihood. If normalized, \(L(i,j,\theta_1)\) and \(L(i,j,\theta_2)\) add up to one given the same \((i,j)\). Due to the design of the game that makes the flat prior reasonable, these can be interpreted as the true probability. For example, \(P(\theta = \theta_1 | i, j) = L(i,j,\theta_1) / [L(i,j,\theta_1) + L(i,j,\theta_2)]\).

We can only observe up to \(n\) random draws. That is, \(i + j \le n\). At each \((i,j)\) combination where \(i + j < n\) there is a binary decision to keep observing more data points or stop there and decide based on observation \((i,j)\). Therefore, the action space can be very large. Including \((i,j) = (0,0)\), i.e., deciding without observing any data, there are \(\frac{n(n+1)}{2}\) binary nodes, giving \(2^{\frac{n(n+1)}{2}}\) patterns.

The size of decision space makes it difficult to simply brute-force the utility function. A dynamic programming approach can help by breaking down the problem into a smaller manageable pieces. This can be implemented as a recursive function. As queries with the identical set of arguments occur many times, computational burden can be reduced by memoization (caching) of results. Here it was implemented as a functional closure having a fixed-size array as a cache storage in the enclosing environment.

Load packages

library(tidyverse)
library(assertthat)

Algorithm

## Posterior function given data (i,j) at specific theta
posterior <- function(i, j, theta) {
    theta^i * (1 - theta)^j
}


## Maximized expected utility given observed data and decision to stop (1) or not (0)
construct_E_U <- function(n, theta1, theta2) {

    ## Create a saved data object in the enclosing environment
    saved <- array(NA,
                   dim = c(n, n, 2),
                   dimnames = list(i = as.character(seq_len(n) - 1),
                                   j = as.character(seq_len(n) - 1),
                                   stop = c("0","1")))

    ## Define a function
    fun <- function(i, j, stop) {
        ## Sanity check
        assert_that(length(i) == length(j))
        assert_that(length(j) == length(stop))
        assert_that(all(i + j < n))
        assert_that(all(i >= 0))
        assert_that(all(j >= 0))
        assert_that(all(stop %in% c(0,1)))

        ## If arguments are vector, loop over them
        if (length(i) > 1) {
            out <- mapply(FUN = fun, i = i, j = j, stop = stop)
            return(as.numeric(out))
        }

        ## Handle computation by conditions
        if (!is.na(saved[i + 1, j + 1, stop + 1])) {
            ## If saved data exists, used it
            out <- saved[i + 1, j + 1, stop + 1]

        } else if (stop == 1) {
            ## Stopping invokes a base case although (i,j) varies
            ## Cost + maximized expected payoff
            out <- -1 * (i + j) +
                n * max(posterior(i, j, theta1), posterior(i, j, theta2)) /
                ## Normalize to two point distribution
                sum(posterior(i, j, theta1), posterior(i, j, theta2))

        } else if (i + j == n - 1 & stop == 0) {
            ## Recursion base case if continuing but only one draw left
            ## Calculate normalized posterior probability of theta1 and theta2 at (i,j)
            p_theta1 <- posterior(i, j, theta1) / sum(posterior(i, j, theta1), posterior(i, j, theta2))
            p_theta2 <- posterior(i, j, theta2) / sum(posterior(i, j, theta1), posterior(i, j, theta2))
            ## Calculate posterior expected theta at (i,j)
            E_theta <- (theta1 * p_theta1) + (theta2 * p_theta2)
            ## Cost + maximized expected payoff (two possibilities)
            out <- -1 * (i + j + 1) +
                ## If 1 is drawn
                n * E_theta       * max(posterior(i + 1, j, theta1), posterior(i + 1, j, theta2)) /
                sum(posterior(i + 1, j, theta1), posterior(i + 1, j, theta2)) +
                ## If 0 is drawn
                n * (1 - E_theta) * max(posterior(i, j + 1, theta1), posterior(i, j + 1, theta2)) /
                sum(posterior(i, j + 1, theta1), posterior(i, j + 1, theta2))

        } else if (i + j <= n - 1 & stop == 0) {
            ## Non-base case if continuing (recursive calls)
            ## https://stat.ethz.ch/R-manual/R-devel/library/base/html/Recall.html
            ## Calculate normalized posterior probability of theta1 and theta2 at (i,j)
            p_theta1 <- posterior(i, j, theta1) / sum(posterior(i, j, theta1), posterior(i, j, theta2))
            p_theta2 <- posterior(i, j, theta2) / sum(posterior(i, j, theta1), posterior(i, j, theta2))
            ## Calculate posterior expected theta at (i,j)
            E_theta <- theta1 * p_theta1 + theta2 * p_theta2
            ## Maximized utility if 1 is drawn
            out <-
                E_theta       * max(Recall(i + 1, j, stop = 1), Recall(i + 1, j, stop = 0)) +
            ## Maximized utility if 0 is drawn
                (1 - E_theta) * max(Recall(i, j + 1, stop = 1), Recall(i, j + 1, stop = 0))

        } else {
            stop("Abnormal (i, j, stop) value")

        }

        ## Save expected utility in the enclosing environment
        saved[i + 1, j + 1, stop + 1] <<- out

        ## Return value
        return(out)
    }

    ## Add additional class to associate a print method
    class(fun) <- c("E_U", class(fun))
    ## Return the constructed closure
    return(fun)
}

## Decision that maximizes expected utility at (i,j)
argmax_d_E_U <- function(i, j) {
    assert_that(length(i) == length(j))

    ## Vectorize if arguments are vectors
    if (length(i) > 1) {
        out <- mapply(FUN = argmax_d_E_U, i = i, j = j)
        return(as.numeric(out))
    }

    ## If stop is better gives 1, if continuting is better gives 0
    which.max(c(E_U(i, j, stop = 0), E_U(i, j, stop = 1))) - 1
}

## Function to print stored data
print.E_U <- function(x, ...) {
    cat("### Showing saved data\n")
    ## http://adv-r.had.co.nz/Environments.html
    out <- get("saved", envir = environment(x))
    print(out)
    invisible(out)
}

Experiment with \(n = 2, \theta_1 = 0.25, \theta_2 = 0.75\)

## Construct for a very small action space
E_U <- construct_E_U(n = 2, theta1 = 0.25, theta2 = 0.75)
## Create a data frame representing nodes and decisions (continue vs stop)
data1 <- expand.grid(i = c(0:1),
                     j = c(0:1),
                     stop = c(0,1)) %>%
    subset(i + j <= 1) %>%
    arrange(i, j, stop)
## Calculate expected utilities
data1$E_U <- with(data1, E_U(i = i, j = j, stop = stop))
## Determine best decisions
data1$d_optim <- with(data1, argmax_d_E_U(i = i, j = j))
## Show data
data1
##   i j stop  E_U d_optim
## 1 0 0    0  0.5       1
## 2 0 0    1  1.0       1
## 3 0 1    0 -0.5       1
## 4 0 1    1  0.5       1
## 5 1 0    0 -0.5       1
## 6 1 0    1  0.5       1
## Show saved internal data in the enclosing environment
E_U
## ### Showing saved data
## , , stop = 0
## 
##    j
## i      0    1
##   0  0.5 -0.5
##   1 -0.5   NA
## 
## , , stop = 1
## 
##    j
## i     0   1
##   0 1.0 0.5
##   1 0.5  NA
## Plot expected utilities under continue (0) or stop (1) at (i,j)
ggplot(data = data1, mapping = aes(x = i, y = j,
                                   color = E_U)) +
    geom_point(size = 5) +
    facet_grid(. ~ stop) +
    scale_color_gradient2(low = "blue", high = "red") +
    theme_bw() + theme(legend.key = element_blank())

## Indicate optimal decision at each (i,j)
ggplot(data = unique(data1[c("i","j","d_optim")]),
       mapping = aes(x = i, y = j, label = d_optim)) +
    geom_text() +
    theme_bw() + theme(legend.key = element_blank())

Experiment with \(n = 10, \theta_1 = 0.25, \theta_2 = 0.75\)

## Construct for a moderate-sized action space
E_U <- construct_E_U(n = 10, theta1 = 0.25, theta2 = 0.75)
## Create a data frame representing nodes and decisions (continue vs stop)
data1 <- expand.grid(i = c(0:9),
                     j = c(0:9),
                     stop = c(0,1)) %>%
    subset(i + j <= 9) %>%
    arrange(i, j, stop)
## Calculate expected utilities
data1$E_U <- with(data1, E_U(i = i, j = j, stop = stop))
## Determine best decisions
data1$d_optim <- with(data1, argmax_d_E_U(i = i, j = j))
## Plot expected utilities under continue (0) or stop (1) at (i,j)
ggplot(data = data1, mapping = aes(x = i, y = j,
                                   color = E_U)) +
    geom_point(size = 5) +
    facet_grid(. ~ stop) +
    scale_color_gradient2(low = "blue", high = "red") +
    theme_bw() + theme(legend.key = element_blank())

## Indicate optimal decision at each (i,j)
ggplot(data = unique(data1[c("i","j","d_optim")]),
       mapping = aes(x = i, y = j, label = d_optim)) +
    geom_text() +
    theme_bw() + theme(legend.key = element_blank())

In this example, the optimum decision is to stop after 1 draw and choose the more likely value based on this single draw.

Experiment with \(n = 30, \theta_1 = 0.25, \theta_2 = 0.75\)

## Construct for a moderate-sized action space
E_U <- construct_E_U(n = 30, theta1 = 0.25, theta2 = 0.75)
## Create a data frame representing nodes and decisions (continue vs stop)
data1 <- expand.grid(i = c(0:29),
                     j = c(0:29),
                     stop = c(0,1)) %>%
    subset(i + j <= 29) %>%
    arrange(i, j, stop)
## Calculate expected utilities
data1$E_U <- with(data1, E_U(i = i, j = j, stop = stop))
## Determine best decisions
data1$d_optim <- with(data1, argmax_d_E_U(i = i, j = j))
## Plot expected utilities under continue (0) or stop (1) at (i,j)
ggplot(data = data1, mapping = aes(x = i, y = j,
                                   color = E_U)) +
    geom_point(size = 5) +
    facet_grid(. ~ stop) +
    scale_color_gradient2(low = "blue", high = "red") +
    theme_bw() + theme(legend.key = element_blank())

## Indicate optimal decision at each (i,j)
ggplot(data = unique(data1[c("i","j","d_optim")]),
       mapping = aes(x = i, y = j, label = d_optim)) +
    geom_text() +
    theme_bw() + theme(legend.key = element_blank())

In this example, the optimum decision appears to be to stop if the count of 1 or 0 is greater by 2 than the other.

Experiment with \(n = 30, \theta_1 = 0.50, \theta_2 = 0.65\)

## Construct for a moderate-sized action space
E_U <- construct_E_U(n = 30, theta1 = 0.50, theta2 = 0.65)
## Create a data frame representing nodes and decisions (continue vs stop)
data1 <- expand.grid(i = c(0:29),
                     j = c(0:29),
                     stop = c(0,1)) %>%
    subset(i + j <= 29) %>%
    arrange(i, j, stop)
## Calculate expected utilities
data1$E_U <- with(data1, E_U(i = i, j = j, stop = stop))
## Determine best decisions
data1$d_optim <- with(data1, argmax_d_E_U(i = i, j = j))
## Plot expected utilities under continue (0) or stop (1) at (i,j)
ggplot(data = data1, mapping = aes(x = i, y = j,
                                   color = E_U)) +
    geom_point(size = 5) +
    facet_grid(. ~ stop) +
    scale_color_gradient2(low = "blue", high = "red") +
    theme_bw() + theme(legend.key = element_blank())

## Indicate optimal decision at each (i,j)
ggplot(data = unique(data1[c("i","j","d_optim")]),
       mapping = aes(x = i, y = j, label = d_optim)) +
    geom_text() +
    theme_bw() + theme(legend.key = element_blank())

Experiment with \(n = 30, \theta_1 = 0.80, \theta_2 = 0.95\)

## Construct for a moderate-sized action space
E_U <- construct_E_U(n = 30, theta1 = 0.70, theta2 = 0.95)
## Create a data frame representing nodes and decisions (continue vs stop)
data1 <- expand.grid(i = c(0:29),
                     j = c(0:29),
                     stop = c(0,1)) %>%
    subset(i + j <= 29) %>%
    arrange(i, j, stop)
## Calculate expected utilities
data1$E_U <- with(data1, E_U(i = i, j = j, stop = stop))
## Determine best decisions
data1$d_optim <- with(data1, argmax_d_E_U(i = i, j = j))
## Plot expected utilities under continue (0) or stop (1) at (i,j)
ggplot(data = data1, mapping = aes(x = i, y = j,
                                   color = E_U)) +
    geom_point(size = 5) +
    facet_grid(. ~ stop) +
    scale_color_gradient2(low = "blue", high = "red") +
    theme_bw() + theme(legend.key = element_blank())

## Indicate optimal decision at each (i,j)
ggplot(data = unique(data1[c("i","j","d_optim")]),
       mapping = aes(x = i, y = j, label = d_optim)) +
    geom_text() +
    theme_bw() + theme(legend.key = element_blank())

Experiment with \(n = 30, \theta_1 = 0.45, \theta_2 = 0.60\)

## Construct for a moderate-sized action space
E_U <- construct_E_U(n = 30, theta1 = 0.45, theta2 = 0.60)
## Create a data frame representing nodes and decisions (continue vs stop)
data1 <- expand.grid(i = c(0:29),
                     j = c(0:29),
                     stop = c(0,1)) %>%
    subset(i + j <= 29) %>%
    arrange(i, j, stop)
## Calculate expected utilities
data1$E_U <- with(data1, E_U(i = i, j = j, stop = stop))
## Determine best decisions
data1$d_optim <- with(data1, argmax_d_E_U(i = i, j = j))
## Plot expected utilities under continue (0) or stop (1) at (i,j)
ggplot(data = data1, mapping = aes(x = i, y = j,
                                   color = E_U)) +
    geom_point(size = 5) +
    facet_grid(. ~ stop) +
    scale_color_gradient2(low = "blue", high = "red") +
    theme_bw() + theme(legend.key = element_blank())

## Indicate optimal decision at each (i,j)
ggplot(data = unique(data1[c("i","j","d_optim")]),
       mapping = aes(x = i, y = j, label = d_optim)) +
    geom_text() +
    theme_bw() + theme(legend.key = element_blank())