library(tidyverse)
library(testthat)
library(tictoc)
source("~/Documents/JMP/code/Rust_IRL_discretized.R")
## 17.569 sec elapsed

1: Optimal Stopping Point

Simulate data where the optimal stopping point is at mileage bucket 30.

data1 <- map(1:104, function(...) monte_carlo_sim(
  c(rep(0, 29), rep(1, 61))))

head(data1)
## [[1]]
##   [1]  1  2  3  4  5  6  7  7  7  8  8  9 10 10 10 12 13 14 15 16 16 16 17 17 17
##  [26] 17 19 20 20 20 20 20 20 20 20 21 22 23 23 24 25 26 27 28 29 29 30  2  2  2
##  [51]  3  4  4  5  5  5  6  7  8  8  8  9  9 10 11 11 11 11 11 12 13 14 15 16 17
##  [76] 18 18 19 20 21 21 22 23 24 25 26 27 27 28 29 29 29 30  2  2  3  4  4  5  5
## 
## [[2]]
##   [1]  1  2  3  3  3  3  4  4  4  5  5  6  7  7  8  9 10 11 11 12 13 14 15 15 16
##  [26] 16 17 17 17 18 18 19 19 20 21 21 21 22 23 23 23 23 25 26 27 28 29 31  1  2
##  [51]  3  3  4  4  5  5  5  5  5  6  6  6  7  7  8  9 10 10 11 12 12 13 14 15 15
##  [76] 16 17 18 19 20 21 22 23 24 26 27 28 29 30  2  3  4  5  6  7  8  8  9  9  9
## 
## [[3]]
##   [1]  1  2  2  3  3  4  5  6  7  7  7  8  9 10 10 10 11 12 12 13 13 14 14 14 15
##  [26] 16 16 17 18 18 19 20 20 20 21 22 23 24 25 25 26 27 28 29 30  1  2  3  4  5
##  [51]  6  7  7  7  7  8  9 10 11 11 12 13 14 15 15 16 17 18 19 20 21 21 22 22 23
##  [76] 24 25 26 26 26 26 27 28 29 30  2  3  4  5  6  6  7  8  8  9  9 10 10 10 11
## 
## [[4]]
##   [1]  1  2  3  4  5  6  7  7  8  9 10 11 11 12 12 12 12 13 13 14 15 15 16 17 17
##  [26] 18 19 19 20 21 21 22 23 23 23 24 25 25 27 28 28 29 29 30  1  3  4  5  6  7
##  [51]  8  9 10 11 11 12 12 13 13 14 15 15 16 17 18 19 20 21 21 22 24 24 25 25 26
##  [76] 27 27 27 28 29 30  1  2  2  2  3  4  4  5  5  6  7  8  8  8  8  9 10 11 12
## 
## [[5]]
##   [1]  1  1  1  1  1  2  2  2  2  2  3  4  4  5  6  6  6  7  8  9  9  9 10 11 11
##  [26] 12 13 14 14 14 15 15 16 16 17 17 19 19 19 20 20 21 22 23 24 25 25 25 26 27
##  [51] 28 29 29 29 29 29 30  2  2  3  4  5  5  6  7  8  8  8  8  9 10 11 11 12 12
##  [76] 12 13 14 14 15 16 16 17 18 19 19 20 21 22 22 22 23 24 24 25 26 26 27 27 27
## 
## [[6]]
##   [1]  1  1  2  3  4  4  4  4  4  5  5  5  6  6  6  6  7  8  8  9 10 10 10 10 11
##  [26] 11 12 12 13 14 14 15 15 16 16 16 17 18 19 20 21 21 22 22 23 24 24 24 25 25
##  [51] 26 26 27 28 29 30  2  3  4  5  6  7  8  8  9 10 11 11 12 13 14 15 15 15 16
##  [76] 16 17 18 19 20 21 22 22 22 22 23 23 23 24 25 26 26 27 28 29 29 29 30  2  2
set.seed(123)
tic()
results1 <- IRL(data1)
toc()
## 5.224 sec elapsed
# Policy: successfully detected the optimal stopping rule
results1[[1]][[1]]
##  [1] 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1
## [39] 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
## [77] 1 1 1 1 1 1 1 1 1 1 1 1 1 1
# Reward function

tibble(
  r = results1[[2]],
  x = 1:90
) %>%
  ggplot(aes(x = x, y = r)) +
  geom_point() +
  geom_vline(xintercept = 30)

# Value function
tibble(r = results1[[3]], x = 1:90) %>%
  ggplot(aes(x = x, y = r)) +
  geom_point() +
  geom_line()

2: Optimal Stopping Point with Noisy Trajectories

Simulate data where the optimal stopping point is at mileage bucket 30, but the agent has a shaky hand and sometimes stops too soon and sometimes stops too late.

set.seed(123)
stopping <- sample(28:32, size = 104, replace = T, prob = 220-100*abs(-2:2))

data2 <- map2(
  1:104, stopping, 
  function(x, y) monte_carlo_sim(c(rep(0, y - 1), rep(1, 91 - y)))
)

head(data2)
## [[1]]
##   [1]  1  2  3  4  4  5  6  6  6  7  8  8  9 10 10 11 12 13 14 14 15 16 17 17 18
##  [26] 18 18 18 19 19 19 19 19 20 21 22 23 24 24 25 26 27 28 28 28 29 30  2  3  4
##  [51]  5  6  7  8  9 10 10 11 12 13 13 14 15 16 17 18 18 18 18 19 20 21 21 22 22
##  [76] 23 23 24 25 26 27 28 28 28 29 30  1  2  2  3  4  4  5  6  7  7  8  9 10 10
## 
## [[2]]
##   [1]  1  2  3  4  5  6  7  7  8  8  9 10 10 10 11 11 11 11 12 13 14 15 15 15 16
##  [26] 17 18 19 20 21 22 22 23 23 24 24 25 25 26 26 27 27 27 27 28 29 30 31  2  2
##  [51]  3  4  5  5  5  5  5  5  6  7  8  9 10 11 11 12 13 14 14 14 14 15 16 16 16
##  [76] 17 18 19 20 21 22 23 24 24 25 26 27 27 27 27 27 28 29 29 29 30 30 30 31  2
## 
## [[3]]
##   [1]  1  2  3  4  5  6  6  7  8  8  8  9 10 10 10 11 12 12 13 14 16 17 18 18 19
##  [26] 20 20 20 21 22 22 22 22 22 23 24 25 26 27 28 28 29 30  2  3  3  4  5  6  6
##  [51]  6  7  8  8  9 10 10 11 12 12 13 14 15 16 17 18 18 19 20 20 20 21 22 22 23
##  [76] 23 24 24 25 25 26 27 28 29 29 30  1  1  2  3  4  5  6  8  8  9  9 10 11 12
## 
## [[4]]
##   [1]  1  2  3  3  4  4  5  6  7  8  8  9 10 11 12 12 13 14 14 15 16 17 18 18 18
##  [26] 19 20 22 23 24 25 26 27 28 29 30 30 31  1  1  1  2  3  3  4  5  6  7  8  8
##  [51]  8  8  9 10 10 11 12 13 14 15 16 16 17 17 18 18 19 19 19 20 21 22 23 23 24
##  [76] 24 25 25 25 26 27 27 28 28 28 29 30 30 31  3  4  5  5  5  6  7  8  9 10 11
## 
## [[5]]
##   [1]  1  2  2  3  4  5  5  6  6  7  8  8  9  9 10 11 12 13 14 14 14 15 17 18 18
##  [26] 18 19 19 20 20 21 21 22 23 23 24 24 25 25 25 26 26 26 27 28 29 30 30 31 32
##  [51]  1  2  3  4  5  5  5  6  6  6  7  7  8  8  9  9 10 11 11 11 12 13 14 15 15
##  [76] 15 15 16 16 17 18 18 18 19 20 21 21 22 23 23 24 25 25 26 27 27 28 29 30 30
## 
## [[6]]
##   [1]  1  1  2  3  4  5  6  8  9  9  9  9  9 10 10 10 10 11 12 13 14 14 15 16 16
##  [26] 16 17 17 18 19 19 19 19 20 21 21 21 22 23 24 24 25 26 27 27 27 28 29 29 29
##  [51] 29 30  2  3  3  3  4  4  5  6  6  7  7  8  9 10 11 12 13 14 14 15 16 17 17
##  [76] 17 17 17 17 18 19 20 21 22 23 23 24 25 26 27 28 29 30  2  2  3  4  4  5  6
set.seed(123)
tic()
results2 <- IRL(data2)
toc()
## 69.927 sec elapsed
# Policy: successfully detected the optimal stopping rule
results2[[1]][[1]]
##  [1] 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1
## [39] 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
## [77] 1 1 1 1 1 1 1 1 1 1 1 1 1 1
# Reward function
results2[[2]]
##  [1] -1 -1 -1 -1 -1 -1 -1 -1  1  1  1 -1 -1 -1 -1 -1 -1  1  1  1  1  1  1  1  1
## [26]  1  1  1  1  1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1
## [51] -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1
## [76] -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1
# Value function has changed
tibble(r2 = results2[[3]], x = 1:90) %>%
  ggplot(aes(x = x, y = r2)) +
  geom_point() +
  geom_line()

3: Optimal Stopping Point with More Noise

set.seed(123)
stopping <- sample(27:33, size = 104, replace = T, prob = 320-100*abs(-3:3))

data3 <- map2(
  1:104, stopping, 
  function(x, y) monte_carlo_sim(c(rep(0, y - 1), rep(1, 91 - y)))
)

head(data3)
## [[1]]
##   [1]  1  2  3  4  4  5  6  6  6  7  8  8  9 10 10 11 12 13 14 14 15 16 17 17 18
##  [26] 18 18 18 19 19 19 19 19 20 21 22 23 24 24 25 26 27 28 28 28 29 30  2  3  4
##  [51]  5  6  7  8  9 10 10 11 12 13 13 14 15 16 17 18 18 18 18 19 20 21 21 22 22
##  [76] 23 23 24 25 26 27 28 28 28 29 30  1  2  2  3  4  4  5  6  7  7  8  9 10 10
## 
## [[2]]
##   [1]  1  2  3  4  5  6  7  7  8  8  9 10 10 10 11 11 11 11 12 13 14 15 15 15 16
##  [26] 17 18 19 20 21 22 22 23 23 24 24 25 25 26 26 27 27 27 27 28 29 30 31 32  1
##  [51]  2  3  4  4  4  4  4  4  5  6  7  8  9 10 10 11 12 13 13 13 13 14 15 15 15
##  [76] 16 17 18 19 20 21 22 23 23 24 25 26 26 26 26 26 27 28 28 28 29 29 29 30 31
## 
## [[3]]
##   [1]  1  2  3  4  5  6  6  7  8  8  8  9 10 10 10 11 12 12 13 14 16 17 18 18 19
##  [26] 20 20 20 21 22 22 22 22 22 23 24 25 26 27 28 28 29 30 31  2  2  3  4  5  5
##  [51]  5  6  7  7  8  9  9 10 11 11 12 13 14 15 16 17 17 18 19 19 19 20 21 21 22
##  [76] 22 23 23 24 24 25 26 27 28 28 29 29 29 30 31  2  3  4  6  6  7  7  8  9 10
## 
## [[4]]
##   [1]  1  2  3  3  4  4  5  6  7  8  8  9 10 11 12 12 13 14 14 15 16 17 18 18 18
##  [26] 19 20 22 23 24 25 26 27 28  2  3  3  4  4  4  4  5  6  6  7  8  9 10 11 11
##  [51] 11 11 12 13 13 14 15 16 17 18 19 19 20 20 21 21 22 22 22 23 24 25 26 26 27
##  [76] 27 28  1  1  2  3  3  4  4  4  5  6  6  7  9 10 11 11 11 12 13 14 15 16 17
## 
## [[5]]
##   [1]  1  2  2  3  4  5  5  6  6  7  8  8  9  9 10 11 12 13 14 14 14 15 17 18 18
##  [26] 18 19 19 20 20 21 21 22 23 23 24 24 25 25 25 26 26 26 27 28  2  3  3  4  5
##  [51]  5  6  7  8  9  9  9 10 10 10 11 11 12 12 13 13 14 15 15 15 16 17 18 19 19
##  [76] 19 19 20 20 21 22 22 22 23 24 25 25 26 27 27 28  2  2  3  4  4  5  6  7  7
## 
## [[6]]
##   [1]  1  1  2  3  4  5  6  8  9  9  9  9  9 10 10 10 10 11 12 13 14 14 15 16 16
##  [26] 16 17 17 18 19 19 19 19 20 21 21 21 22 23 24 24 25 26 27 27 27 28 29 29 29
##  [51] 29 30  2  3  3  3  4  4  5  6  6  7  7  8  9 10 11 12 13 14 14 15 16 17 17
##  [76] 17 17 17 17 18 19 20 21 22 23 23 24 25 26 27 28 29 30  2  2  3  4  4  5  6
# results3 <- IRL(data3)
# This returns null.