library(tidyverse)
library(testthat)
library(tictoc)
source("~/Documents/JMP/code/Rust_IRL_discretized.R")
## 17.569 sec elapsed
Simulate data where the optimal stopping point is at mileage bucket 30.
data1 <- map(1:104, function(...) monte_carlo_sim(
c(rep(0, 29), rep(1, 61))))
head(data1)
## [[1]]
## [1] 1 2 3 4 5 6 7 7 7 8 8 9 10 10 10 12 13 14 15 16 16 16 17 17 17
## [26] 17 19 20 20 20 20 20 20 20 20 21 22 23 23 24 25 26 27 28 29 29 30 2 2 2
## [51] 3 4 4 5 5 5 6 7 8 8 8 9 9 10 11 11 11 11 11 12 13 14 15 16 17
## [76] 18 18 19 20 21 21 22 23 24 25 26 27 27 28 29 29 29 30 2 2 3 4 4 5 5
##
## [[2]]
## [1] 1 2 3 3 3 3 4 4 4 5 5 6 7 7 8 9 10 11 11 12 13 14 15 15 16
## [26] 16 17 17 17 18 18 19 19 20 21 21 21 22 23 23 23 23 25 26 27 28 29 31 1 2
## [51] 3 3 4 4 5 5 5 5 5 6 6 6 7 7 8 9 10 10 11 12 12 13 14 15 15
## [76] 16 17 18 19 20 21 22 23 24 26 27 28 29 30 2 3 4 5 6 7 8 8 9 9 9
##
## [[3]]
## [1] 1 2 2 3 3 4 5 6 7 7 7 8 9 10 10 10 11 12 12 13 13 14 14 14 15
## [26] 16 16 17 18 18 19 20 20 20 21 22 23 24 25 25 26 27 28 29 30 1 2 3 4 5
## [51] 6 7 7 7 7 8 9 10 11 11 12 13 14 15 15 16 17 18 19 20 21 21 22 22 23
## [76] 24 25 26 26 26 26 27 28 29 30 2 3 4 5 6 6 7 8 8 9 9 10 10 10 11
##
## [[4]]
## [1] 1 2 3 4 5 6 7 7 8 9 10 11 11 12 12 12 12 13 13 14 15 15 16 17 17
## [26] 18 19 19 20 21 21 22 23 23 23 24 25 25 27 28 28 29 29 30 1 3 4 5 6 7
## [51] 8 9 10 11 11 12 12 13 13 14 15 15 16 17 18 19 20 21 21 22 24 24 25 25 26
## [76] 27 27 27 28 29 30 1 2 2 2 3 4 4 5 5 6 7 8 8 8 8 9 10 11 12
##
## [[5]]
## [1] 1 1 1 1 1 2 2 2 2 2 3 4 4 5 6 6 6 7 8 9 9 9 10 11 11
## [26] 12 13 14 14 14 15 15 16 16 17 17 19 19 19 20 20 21 22 23 24 25 25 25 26 27
## [51] 28 29 29 29 29 29 30 2 2 3 4 5 5 6 7 8 8 8 8 9 10 11 11 12 12
## [76] 12 13 14 14 15 16 16 17 18 19 19 20 21 22 22 22 23 24 24 25 26 26 27 27 27
##
## [[6]]
## [1] 1 1 2 3 4 4 4 4 4 5 5 5 6 6 6 6 7 8 8 9 10 10 10 10 11
## [26] 11 12 12 13 14 14 15 15 16 16 16 17 18 19 20 21 21 22 22 23 24 24 24 25 25
## [51] 26 26 27 28 29 30 2 3 4 5 6 7 8 8 9 10 11 11 12 13 14 15 15 15 16
## [76] 16 17 18 19 20 21 22 22 22 22 23 23 23 24 25 26 26 27 28 29 29 29 30 2 2
set.seed(123)
tic()
results1 <- IRL(data1)
toc()
## 5.224 sec elapsed
# Policy: successfully detected the optimal stopping rule
results1[[1]][[1]]
## [1] 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1
## [39] 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
## [77] 1 1 1 1 1 1 1 1 1 1 1 1 1 1
# Reward function
tibble(
r = results1[[2]],
x = 1:90
) %>%
ggplot(aes(x = x, y = r)) +
geom_point() +
geom_vline(xintercept = 30)
# Value function
tibble(r = results1[[3]], x = 1:90) %>%
ggplot(aes(x = x, y = r)) +
geom_point() +
geom_line()
Simulate data where the optimal stopping point is at mileage bucket 30, but the agent has a shaky hand and sometimes stops too soon and sometimes stops too late.
set.seed(123)
stopping <- sample(28:32, size = 104, replace = T, prob = 220-100*abs(-2:2))
data2 <- map2(
1:104, stopping,
function(x, y) monte_carlo_sim(c(rep(0, y - 1), rep(1, 91 - y)))
)
head(data2)
## [[1]]
## [1] 1 2 3 4 4 5 6 6 6 7 8 8 9 10 10 11 12 13 14 14 15 16 17 17 18
## [26] 18 18 18 19 19 19 19 19 20 21 22 23 24 24 25 26 27 28 28 28 29 30 2 3 4
## [51] 5 6 7 8 9 10 10 11 12 13 13 14 15 16 17 18 18 18 18 19 20 21 21 22 22
## [76] 23 23 24 25 26 27 28 28 28 29 30 1 2 2 3 4 4 5 6 7 7 8 9 10 10
##
## [[2]]
## [1] 1 2 3 4 5 6 7 7 8 8 9 10 10 10 11 11 11 11 12 13 14 15 15 15 16
## [26] 17 18 19 20 21 22 22 23 23 24 24 25 25 26 26 27 27 27 27 28 29 30 31 2 2
## [51] 3 4 5 5 5 5 5 5 6 7 8 9 10 11 11 12 13 14 14 14 14 15 16 16 16
## [76] 17 18 19 20 21 22 23 24 24 25 26 27 27 27 27 27 28 29 29 29 30 30 30 31 2
##
## [[3]]
## [1] 1 2 3 4 5 6 6 7 8 8 8 9 10 10 10 11 12 12 13 14 16 17 18 18 19
## [26] 20 20 20 21 22 22 22 22 22 23 24 25 26 27 28 28 29 30 2 3 3 4 5 6 6
## [51] 6 7 8 8 9 10 10 11 12 12 13 14 15 16 17 18 18 19 20 20 20 21 22 22 23
## [76] 23 24 24 25 25 26 27 28 29 29 30 1 1 2 3 4 5 6 8 8 9 9 10 11 12
##
## [[4]]
## [1] 1 2 3 3 4 4 5 6 7 8 8 9 10 11 12 12 13 14 14 15 16 17 18 18 18
## [26] 19 20 22 23 24 25 26 27 28 29 30 30 31 1 1 1 2 3 3 4 5 6 7 8 8
## [51] 8 8 9 10 10 11 12 13 14 15 16 16 17 17 18 18 19 19 19 20 21 22 23 23 24
## [76] 24 25 25 25 26 27 27 28 28 28 29 30 30 31 3 4 5 5 5 6 7 8 9 10 11
##
## [[5]]
## [1] 1 2 2 3 4 5 5 6 6 7 8 8 9 9 10 11 12 13 14 14 14 15 17 18 18
## [26] 18 19 19 20 20 21 21 22 23 23 24 24 25 25 25 26 26 26 27 28 29 30 30 31 32
## [51] 1 2 3 4 5 5 5 6 6 6 7 7 8 8 9 9 10 11 11 11 12 13 14 15 15
## [76] 15 15 16 16 17 18 18 18 19 20 21 21 22 23 23 24 25 25 26 27 27 28 29 30 30
##
## [[6]]
## [1] 1 1 2 3 4 5 6 8 9 9 9 9 9 10 10 10 10 11 12 13 14 14 15 16 16
## [26] 16 17 17 18 19 19 19 19 20 21 21 21 22 23 24 24 25 26 27 27 27 28 29 29 29
## [51] 29 30 2 3 3 3 4 4 5 6 6 7 7 8 9 10 11 12 13 14 14 15 16 17 17
## [76] 17 17 17 17 18 19 20 21 22 23 23 24 25 26 27 28 29 30 2 2 3 4 4 5 6
set.seed(123)
tic()
results2 <- IRL(data2)
toc()
## 69.927 sec elapsed
# Policy: successfully detected the optimal stopping rule
results2[[1]][[1]]
## [1] 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1
## [39] 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
## [77] 1 1 1 1 1 1 1 1 1 1 1 1 1 1
# Reward function
results2[[2]]
## [1] -1 -1 -1 -1 -1 -1 -1 -1 1 1 1 -1 -1 -1 -1 -1 -1 1 1 1 1 1 1 1 1
## [26] 1 1 1 1 1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1
## [51] -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1
## [76] -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1
# Value function has changed
tibble(r2 = results2[[3]], x = 1:90) %>%
ggplot(aes(x = x, y = r2)) +
geom_point() +
geom_line()
set.seed(123)
stopping <- sample(27:33, size = 104, replace = T, prob = 320-100*abs(-3:3))
data3 <- map2(
1:104, stopping,
function(x, y) monte_carlo_sim(c(rep(0, y - 1), rep(1, 91 - y)))
)
head(data3)
## [[1]]
## [1] 1 2 3 4 4 5 6 6 6 7 8 8 9 10 10 11 12 13 14 14 15 16 17 17 18
## [26] 18 18 18 19 19 19 19 19 20 21 22 23 24 24 25 26 27 28 28 28 29 30 2 3 4
## [51] 5 6 7 8 9 10 10 11 12 13 13 14 15 16 17 18 18 18 18 19 20 21 21 22 22
## [76] 23 23 24 25 26 27 28 28 28 29 30 1 2 2 3 4 4 5 6 7 7 8 9 10 10
##
## [[2]]
## [1] 1 2 3 4 5 6 7 7 8 8 9 10 10 10 11 11 11 11 12 13 14 15 15 15 16
## [26] 17 18 19 20 21 22 22 23 23 24 24 25 25 26 26 27 27 27 27 28 29 30 31 32 1
## [51] 2 3 4 4 4 4 4 4 5 6 7 8 9 10 10 11 12 13 13 13 13 14 15 15 15
## [76] 16 17 18 19 20 21 22 23 23 24 25 26 26 26 26 26 27 28 28 28 29 29 29 30 31
##
## [[3]]
## [1] 1 2 3 4 5 6 6 7 8 8 8 9 10 10 10 11 12 12 13 14 16 17 18 18 19
## [26] 20 20 20 21 22 22 22 22 22 23 24 25 26 27 28 28 29 30 31 2 2 3 4 5 5
## [51] 5 6 7 7 8 9 9 10 11 11 12 13 14 15 16 17 17 18 19 19 19 20 21 21 22
## [76] 22 23 23 24 24 25 26 27 28 28 29 29 29 30 31 2 3 4 6 6 7 7 8 9 10
##
## [[4]]
## [1] 1 2 3 3 4 4 5 6 7 8 8 9 10 11 12 12 13 14 14 15 16 17 18 18 18
## [26] 19 20 22 23 24 25 26 27 28 2 3 3 4 4 4 4 5 6 6 7 8 9 10 11 11
## [51] 11 11 12 13 13 14 15 16 17 18 19 19 20 20 21 21 22 22 22 23 24 25 26 26 27
## [76] 27 28 1 1 2 3 3 4 4 4 5 6 6 7 9 10 11 11 11 12 13 14 15 16 17
##
## [[5]]
## [1] 1 2 2 3 4 5 5 6 6 7 8 8 9 9 10 11 12 13 14 14 14 15 17 18 18
## [26] 18 19 19 20 20 21 21 22 23 23 24 24 25 25 25 26 26 26 27 28 2 3 3 4 5
## [51] 5 6 7 8 9 9 9 10 10 10 11 11 12 12 13 13 14 15 15 15 16 17 18 19 19
## [76] 19 19 20 20 21 22 22 22 23 24 25 25 26 27 27 28 2 2 3 4 4 5 6 7 7
##
## [[6]]
## [1] 1 1 2 3 4 5 6 8 9 9 9 9 9 10 10 10 10 11 12 13 14 14 15 16 16
## [26] 16 17 17 18 19 19 19 19 20 21 21 21 22 23 24 24 25 26 27 27 27 28 29 29 29
## [51] 29 30 2 3 3 3 4 4 5 6 6 7 7 8 9 10 11 12 13 14 14 15 16 17 17
## [76] 17 17 17 17 18 19 20 21 22 23 23 24 25 26 27 28 29 30 2 2 3 4 4 5 6
# results3 <- IRL(data3)
# This returns null.