library(fable)
library(ggplot2)
library(tidyverse)
## ── Attaching packages ─────────────────────────────────────── tidyverse 1.3.1 ──
## ✓ tibble 3.1.6 ✓ dplyr 1.0.7
## ✓ tidyr 1.1.4 ✓ stringr 1.4.0
## ✓ readr 2.1.1 ✓ forcats 0.5.1
## ✓ purrr 0.3.4
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## x dplyr::filter() masks stats::filter()
## x dplyr::lag() masks stats::lag()
library(rstan)
## Loading required package: StanHeaders
## rstan (Version 2.21.2, GitRev: 2e1f913d3ca3)
## For execution on a local, multicore CPU with excess RAM we recommend calling
## options(mc.cores = parallel::detectCores()).
## To avoid recompilation of unchanged Stan programs, we recommend calling
## rstan_options(auto_write = TRUE)
##
## Attaching package: 'rstan'
## The following object is masked from 'package:tidyr':
##
## extract
library(tidybayes)
library(loo)
## This is loo version 2.4.1
## - Online documentation and vignettes at mc-stan.org/loo
## - As of v2.0.0 loo defaults to 1 core but we recommend using as many as possible. Use the 'cores' argument or set options(mc.cores = NUM_CORES) for an entire session.
##
## Attaching package: 'loo'
## The following object is masked from 'package:rstan':
##
## loo
options(mc.cores = 4)
Set a budget of 5000. Allocate across three channels. Allocations vary in intervals of 10. Here is an (extremely inefficient yet clean) way to generate all allocations.
budget <- 5000
allocations = tibble(expand.grid(
X1=seq(0, budget, by=10),
X2=seq(0, budget, by=10),
X3=seq(0, budget, by=10)
)) %>%
mutate(s = X1 + X2 + X3) %>%
filter(s == budget) %>%
select(-s)
## Print random 10
sample_n(allocations, 10)
## # A tibble: 10 × 3
## X1 X2 X3
## <dbl> <dbl> <dbl>
## 1 430 3120 1450
## 2 4720 80 200
## 3 320 1990 2690
## 4 1140 2590 1270
## 5 970 3650 380
## 6 2230 1810 960
## 7 3990 660 350
## 8 2800 110 2090
## 9 1090 3050 860
## 10 1530 3340 130
The dynamic model:
\[ y_0 \rightarrow y_1 ; X_1 \\ y_0 \rightarrow y_1 ; X_2 \\ y_0 \rightarrow y_1 ; X_3 \\ y_0 \rightarrow y_2 ; X_2 \\ y_0 \rightarrow y_2 ; X_3 \\ y_{c,0} \rightarrow y_c ; Y_1 \\ y_{c,0} \rightarrow y_c ; Y_2 \\ \]
The deterministic equilibrium model. \(T=10000\). \[ Y_1 = T * (f(\omega_{11} * X_1 + \omega_{12} * X_2 + \omega_{13} * X_3) \\ Y_2 = T * f(\omega_{22} * X_2 + \omega_{23} * X_3)\\ Y = T * f(\omega_{1} * Y_1 + \omega_{2} * Y_2)\\ \]
The system has the following constraints. \(Y_0 + Y_1 + Y_2 = T\) and \(Y_{c,0} + Y_{c} = T\). I chose this model because it is like a multilayer perceptron. The casual DAG looks like the following, with \(Y_1\) and \(Y_2\).
bnlearn::graphviz.plot(bnlearn::model2network("[X1][X2][X3][Y1|X1:X2:X3][Y2|X2:X3][Y|Y1:Y2]"))
## Loading required namespace: Rgraphviz
Where T is the total number of people in the system. But I think I may have made some errors in math.
The probabilistic version is derived by solving Kolmogorov equations.
\[ Y_1 \sim \text{Binomial} (T, f(\omega_{11} * X_1 + \omega_{12} * X_2 + \omega_{13} * X_3) ) \\ Y_2 \sim \text{Binomial} (T, f(\omega_{22} * X_2 + \omega_{23} * X_3) ) \\ Y \sim \text{Binomial} (T, f(\omega_{1} * Y_1 + \omega_{2} * Y_2)) \\ \]
The deterministic values are the expected values of the probabilistic model.
As an SCM, let the exogenous variables be a vectors of T uniforms. Let g be a function that counts the number of uniforms that are less than \(f(...)\)
\[ Y_1 = g(f(\omega_{11} * X_1 + \omega_{12} * X_2 + \omega_{13} * X_3), T, N_{Y_1}) \\ Y_2 = g(f(\omega_{22} * X_2 + \omega_{23} * X_3), T, N_{Y_2}) \\ Y = g(f(\omega_{1} * Y_1 + \omega_{2} * Y_2), T, N_{Y}) \]
The function \(f\) can have different degrees of polynomials. I choose the first for simplicity.
f <- function(u){
(u)/(u + 1)
}
f2 <- function(u){
(u^2)/(u^2 + u + 1)
}
f3 <- function(u){
(u^3)/(u^3 + u^2 + u + 1)
}
I use the following parametrizations.
tot <- 10000
w11 <- 0.0003
w12 <- 0.0010
w13 <- 0.0003
w21 <- 0
w22 <- 0.0002
w23 <- 0.0015
w1 <- 0.00006
w2 <- 0.00002
Compute the expected values for the allocations.
allocations <- allocations %>%
mutate( # Expectations
EY1 = tot * f(w11 * X1 + w12 * X2 + w13 * X3),
EY2 = tot * f(w22 * X2 + w23 * X3),
EY = tot * f(w1 * EY1 + w2 * EY2)
)
# Print random 10
sample_n(allocations, 10)
## # A tibble: 10 × 6
## X1 X2 X3 EY1 EY2 EY
## <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 1340 1530 2130 7200. 7778. 3701.
## 2 1060 1090 2850 6935. 8180. 3670.
## 3 1480 270 3250 6281. 8313. 3520.
## 4 2070 2620 310 7693. 4972. 3594.
## 5 3030 730 1240 6679. 6673. 3482.
## 6 40 2470 2490 7635. 8088. 3827.
## 7 1280 590 3130 6567. 8280. 3588.
## 8 630 2910 1460 7796. 7349. 3807.
## 9 0 1840 3160 7360. 8363. 3784.
## 10 690 3000 1310 7826. 7195. 3802.
Create a string to represent allocations.
allocations <- mutate(allocations, allocation = paste(X1, X2, X3, sep="-"))
sample_n(allocations, 10) %>% select(allocation)
## # A tibble: 10 × 1
## allocation
## <chr>
## 1 2730-230-2040
## 2 2910-2030-60
## 3 2430-2280-290
## 4 290-1220-3490
## 5 3520-200-1280
## 6 980-110-3910
## 7 2850-1720-430
## 8 4000-850-150
## 9 1340-970-2690
## 10 770-1390-2840
Get the allocation with the highest expected outcome.
maxEY = max(allocations$EY)
print(paste0("Max Y: ", round(maxEY, 2)))
## [1] "Max Y: 3858.11"
optimal_allocs = filter(allocations, EY == maxEY)$allocation
if(length(optimal_allocs) >1) print("more than one max")
optimal_alloc <- as.character(optimal_allocs[1])
optimal_alloc
## [1] "0-3530-1470"
Visualize the ground truth expected allocation curve.
core_plot_data <- allocations %>%
filter(((X1 %% 500 == 0) & (X2 %% 500 == 0) & (X3 %% 500 == 0) | (allocation == optimal_alloc))) %>%
mutate(allocation = factor(allocation, levels=allocation))
core_plot <- ggplot(core_plot_data, aes(x=allocation, y=EY)) +
geom_point() +
theme(axis.text.x = element_text(angle = 45, hjust=1)) +
ylim(2250, 4200) +
geom_vline(xintercept=optimal_alloc) +
annotate(geom = "vline",
x = optimal_alloc,
xintercept = optimal_alloc,
linetype = c("solid")) +
annotate(geom = "text",
label = optimal_alloc,
x = optimal_alloc,
y = 3800,
angle = 90,
vjust = 1.5) +
ggtitle("Ground truth allocation vs expected conversions")
## Warning: Ignoring unknown aesthetics: x
print(core_plot)
Simulate training data of size n = 1000
sim_y <- Vectorize(function(n_vec, p){
# A function that simulates a binomial(m, p) from a vector of m uniforms
return(sum(map_int(n_vec, ~ .x < p)))
})
# Size of training data
n <- 1000
training <- sample_n(allocations, n, replace=TRUE) %>% # Sample n
mutate(
Ny1 = map(1:n(), ~ runif(tot)), # Exogenous
Ny2 = map(1:n(), ~ runif(tot)), # Exogenous
Ny = map(1:n(), ~ runif(tot)), # Exogenous
Y1 = sim_y(Ny1, f(w11 * X1 + w12 * X2 + w13 * X3)),
Y2 = sim_y(Ny2, f(w22 * X2 + w23 * X3)),
Y = sim_y(Ny, f(w1 * Y1 + w2 * Y2))
)
head(training)
## # A tibble: 6 × 13
## X1 X2 X3 EY1 EY2 EY allocation Ny1 Ny2 Ny Y1 Y2
## <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <chr> <list> <lis> <lis> <int> <int>
## 1 370 2180 2450 7516. 8043. 3796. 370-2180-2… <dbl … <dbl… <dbl… 7548 7972
## 2 570 720 3710 6671. 8509. 3632. 570-720-37… <dbl … <dbl… <dbl… 6678 8505
## 3 900 3720 380 8041. 5678. 3734. 900-3720-3… <dbl … <dbl… <dbl… 8026 5673
## 4 1010 3710 280 8038. 5375. 3710. 1010-3710-… <dbl … <dbl… <dbl… 8153 5405
## 5 3460 420 1120 6421. 6382. 3390. 3460-420-1… <dbl … <dbl… <dbl… 6461 6306
## 6 2740 1000 1260 6875 6764. 3539. 2740-1000-… <dbl … <dbl… <dbl… 6839 6746
## # … with 1 more variable: Y <int>
Format the data for Stan.
formatted_data <- training %>% compose_data()
formatted_data$X1_alloc <- allocations$X1
formatted_data$X2_alloc <- allocations$X2
formatted_data$X3_alloc <- allocations$X3
formatted_data$m <- nrow(allocations)
print(class(formatted_data))
## [1] "list"
print(names(formatted_data))
## [1] "X1" "X2" "X3" "EY1" "EY2"
## [6] "EY" "allocation" "n_allocation" "Ny1" "Ny2"
## [11] "Ny" "Y1" "Y2" "Y" "n"
## [16] "X1_alloc" "X2_alloc" "X3_alloc" "m"
Write and fit the proposed model. Using Poisson instead of Binomial for sake of using HMC in Stan. If you put a Poisson(lambda) prior on the number of trials in a binomial with parameter p, it simplifies to a Poisson(lambda * p). Note that the “generated quantites” block is where all the magic happens. In this block you specify operations on the parameters. In my case, I iterate through each of the allocations and record the allocation that has the highest value of Y (conversion count), and that value of Y itself. Then, for each sample from the posterior, I’ll get a tuple of the optimal allocation/Y value. I plot these below.
In the generated quantites block, I also store the log probability of each data point given a parameter vector sampled from the posterior. Averaging these values gives expected pointwise posterior density, which is used for predictive fit quantification and model comparison.
ground_truth_code <- "
functions {
real f(real u) {
return (u) / (u + 1);
}
}
data {
int n; // number of observations
int m;
vector[n] X1;
vector[n] X2;
vector[n] X3;
int Y1[n];
int Y2[n];
int Y[n];
vector[m] X1_alloc;
vector[m] X2_alloc;
vector[m] X3_alloc;
}
parameters {
real<lower=0, upper=0.01> w11;
real<lower=0, upper=0.01> w12;
real<lower=0, upper=0.01> w13;
real<lower=0, upper=0.01> w22;
real<lower=0, upper=0.01> w23;
real<lower=0, upper=0.0001> w1;
real<lower=0, upper=0.0001> w2;
real<lower=9000> tot;
}
transformed parameters {
vector[n] Y1_mu;
vector[n] Y2_mu;
vector[n] Y_mu;
for(i in 1:n){
Y1_mu[i] = tot * f(w11 * X1[i] + w12 * X2[i] + w13 * X3[i]);
Y2_mu[i] = tot * f(w22 * X2[i] + w23 * X3[i]);
Y_mu[i] = tot * f(w1 * Y1_mu[i] + w2 * Y2_mu[i]);
}
}
model {
// Half normal starting at highest Y value
tot ~ normal(9000, 1200);
for(i in 1:n){
Y1[i] ~ poisson(Y1_mu[i]);
Y2[i] ~ poisson(Y2_mu[i]);
Y[i] ~ poisson(Y_mu[i]);
}
}
generated quantities {
vector[n] log_lik;
vector[n] y_pred;
real X1_max;
real X2_max;
real X3_max;
real Y1_;
real Y2_;
real eta_max;
real y_alloc_pred;
for(i in 1:n){
y_pred[i] = poisson_rng(Y_mu[i]);
log_lik[i] = poisson_lpmf(Y[i] | Y_mu[i]);
}
eta_max = 0;
for(j in 1:m){
Y1_ = tot * f(w11 * X1_alloc[j] + w12 * X2_alloc[j] + w13 * X3_alloc[j]);
Y2_ = tot * f(w22 * X2_alloc[j] + w23 * X3_alloc[j]);
y_alloc_pred = tot * f(w1 * Y1_ + w2 * Y2_);
if(y_alloc_pred > eta_max){
eta_max = y_alloc_pred;
X1_max = X1_alloc[j];
X2_max = X2_alloc[j];
X3_max = X3_alloc[j];
}
}
}
"
n_iter <- 2000
ground_truth_model <- stan_model(model_code = ground_truth_code)
## Trying to compile a simple C file
## Running /Library/Frameworks/R.framework/Resources/bin/R CMD SHLIB foo.c
## clang -mmacosx-version-min=10.13 -I"/Library/Frameworks/R.framework/Resources/include" -DNDEBUG -I"/Library/Frameworks/R.framework/Versions/4.1/Resources/library/Rcpp/include/" -I"/Library/Frameworks/R.framework/Versions/4.1/Resources/library/RcppEigen/include/" -I"/Library/Frameworks/R.framework/Versions/4.1/Resources/library/RcppEigen/include/unsupported" -I"/Library/Frameworks/R.framework/Versions/4.1/Resources/library/BH/include" -I"/Library/Frameworks/R.framework/Versions/4.1/Resources/library/StanHeaders/include/src/" -I"/Library/Frameworks/R.framework/Versions/4.1/Resources/library/StanHeaders/include/" -I"/Library/Frameworks/R.framework/Versions/4.1/Resources/library/RcppParallel/include/" -I"/Library/Frameworks/R.framework/Versions/4.1/Resources/library/rstan/include" -DEIGEN_NO_DEBUG -DBOOST_DISABLE_ASSERTS -DBOOST_PENDING_INTEGER_LOG2_HPP -DSTAN_THREADS -DBOOST_NO_AUTO_PTR -include '/Library/Frameworks/R.framework/Versions/4.1/Resources/library/StanHeaders/include/stan/math/prim/mat/fun/Eigen.hpp' -D_REENTRANT -DRCPP_PARALLEL_USE_TBB=1 -I/usr/local/include -fPIC -Wall -g -O2 -c foo.c -o foo.o
## In file included from <built-in>:1:
## In file included from /Library/Frameworks/R.framework/Versions/4.1/Resources/library/StanHeaders/include/stan/math/prim/mat/fun/Eigen.hpp:13:
## In file included from /Library/Frameworks/R.framework/Versions/4.1/Resources/library/RcppEigen/include/Eigen/Dense:1:
## In file included from /Library/Frameworks/R.framework/Versions/4.1/Resources/library/RcppEigen/include/Eigen/Core:88:
## /Library/Frameworks/R.framework/Versions/4.1/Resources/library/RcppEigen/include/Eigen/src/Core/util/Macros.h:628:1: error: unknown type name 'namespace'
## namespace Eigen {
## ^
## /Library/Frameworks/R.framework/Versions/4.1/Resources/library/RcppEigen/include/Eigen/src/Core/util/Macros.h:628:16: error: expected ';' after top level declarator
## namespace Eigen {
## ^
## ;
## In file included from <built-in>:1:
## In file included from /Library/Frameworks/R.framework/Versions/4.1/Resources/library/StanHeaders/include/stan/math/prim/mat/fun/Eigen.hpp:13:
## In file included from /Library/Frameworks/R.framework/Versions/4.1/Resources/library/RcppEigen/include/Eigen/Dense:1:
## /Library/Frameworks/R.framework/Versions/4.1/Resources/library/RcppEigen/include/Eigen/Core:96:10: fatal error: 'complex' file not found
## #include <complex>
## ^~~~~~~~~
## 3 errors generated.
## make: *** [foo.o] Error 1
ground_truth_fit <- sampling(ground_truth_model, formatted_data, iter=n_iter, seed=123)
Next I draw traceplots. These are not meant for the paper, they are just meant to check the MCMC converged and mixed. Since this is the ground truth model, this traces should converge to the ground truth values. That is not true for the baseline model.
traceplot(ground_truth_fit, c("w11", "w12", "w13", "w22", "w23", "w1", "w2", "tot"))
I also check predictive fit. Again, these are not meant for the paper, though they could be added if it’s useful for the message. In the case of the ground truth proposed model, this is a sanity check.
y_pred_ground_truth <- rstan::extract(ground_truth_fit, "y_pred")$y_pred
plot(training$X1, training$Y, pch=20, xlab="X1", ylab="Y", ylim=c(2500, 4500))
for(i in 1:n_iter){
points(training$X1, y_pred_ground_truth[i, ], pch=20, col=rgb(139/255, 0, 0, 0.01))
}
plot(training$X2, training$Y, pch=20, xlab="X2", ylab="Y", ylim=c(2500, 4500))
for(i in 1:n_iter){
points(training$X2, y_pred_ground_truth[i, ], pch=20, col=rgb(139/255, 0, 0, 0.01))
}
plot(training$X3, training$Y, pch=20, xlab="X3", ylab="Y", ylim=c(2500, 4500))
for(i in 1:n_iter){
points(training$X3, y_pred_ground_truth[i, ], pch=20, col=rgb(139/255, 0, 0, 0.01))
}
The following generates the results finding an optimal allocation simply by optimizating ATE.
initial_proposed_plot <- as_tibble(rstan::extract(ground_truth_fit, c("X1_max", "X2_max", "X3_max", "eta_max"))) %>%
rename(
Y=eta_max,
X1 = X1_max,
X2 = X2_max,
X3 = X3_max
) %>%
mutate(allocation = paste(X1, X2, X3, sep="-")) %>%
select(allocation, Y) %>%
mutate(allocation = factor(allocation, levels=allocations$allocation)) %>%
mutate(
model = "proposed",
) %>%
ggplot(aes(x=allocation, y=Y, colour="red")) +
geom_point() +
theme(axis.text.x = element_text(angle = 45, hjust=1)) +
geom_vline(xintercept=optimal_alloc) +
annotate(geom = "vline",
x = optimal_alloc,
xintercept = optimal_alloc,
linetype = c("solid")) +
annotate(geom = "text",
label = optimal_alloc,
x = optimal_alloc,
y = 3855,
angle = 90,
vjust = 2) +
ggtitle("Proposed model ATE optimization")
## Warning: Ignoring unknown aesthetics: x
print(initial_proposed_plot)
The vertical line is the same ground truth vertical line from the groundtruth plot.
Now I fit the baseline model.
## Fit both models on the proposed model
baseline_model_code <- "
functions {
real hill_function(real x, real rho, real kappa) {
return (x^rho) / (kappa^rho + x^rho);
}
}
data {
int n; // number of observations
int m; // length of allocation vector
vector[n] X1;
vector[n] X2;
vector[n] X3;
vector[n] Y;
vector[m] X1_alloc;
vector[m] X2_alloc;
vector[m] X3_alloc;
}
parameters {
real<lower=0> tau;
real<lower=0> beta1;
real<lower=0> beta2;
real<lower=0> beta3;
real<lower=0> rho1;
real<lower=0> rho2;
real<lower=0> rho3;
real<lower=0> kappa1;
real<lower=0> kappa2;
real<lower=0> kappa3;
real<lower=10> scale;
}
transformed parameters {
vector[n] eta;
for(i in 1:n){
eta[i] = tau + beta1 * hill_function(X1[i], rho1, kappa1) + beta2 * hill_function(X2[i], rho2, kappa2) + beta3 * hill_function(X3[i], rho3, kappa3);
}
}
model {
// Parameter priors
tau ~ normal(0, 50);
beta1 ~ normal(0, 3000);
rho1 ~ gamma(2, 1);
kappa1 ~ normal(0, 200);
beta2 ~ normal(0, 3000);
rho2 ~ gamma(2, 1);
kappa2 ~ normal(0, 200);
beta3 ~ normal(0, 3000);
rho3 ~ gamma(2, 1);
kappa3 ~ normal(0, 200);
scale ~ cauchy(0, 100);
// Exogenous
// The likelihood
for(i in 1:n){
Y[i] ~ normal(eta[i], scale);
}
}
generated quantities {
vector[n] log_lik;
vector[n] y_pred;
real eta_alloc;
real eta_max;
real X1_max;
real X2_max;
real X3_max;
eta_max = 0;
for(i in 1:n){
y_pred[i] = normal_rng(eta[i], scale);
log_lik[i] = normal_lpdf(Y[i] | eta[i], scale);
}
for(j in 1:m){
eta_alloc = tau + beta1 * hill_function(X1_alloc[j], rho1, kappa1) + beta2 * hill_function(X2_alloc[j], rho2, kappa2) + beta3 * hill_function(X3_alloc[j], rho3, kappa3);
if(eta_alloc > eta_max){
eta_max = eta_alloc;
X1_max = X1_alloc[j];
X2_max = X2_alloc[j];
X3_max = X3_alloc[j];
}
}
}
"
baseline_model <- stan_model(model_code = baseline_model_code)
## Trying to compile a simple C file
## Running /Library/Frameworks/R.framework/Resources/bin/R CMD SHLIB foo.c
## clang -mmacosx-version-min=10.13 -I"/Library/Frameworks/R.framework/Resources/include" -DNDEBUG -I"/Library/Frameworks/R.framework/Versions/4.1/Resources/library/Rcpp/include/" -I"/Library/Frameworks/R.framework/Versions/4.1/Resources/library/RcppEigen/include/" -I"/Library/Frameworks/R.framework/Versions/4.1/Resources/library/RcppEigen/include/unsupported" -I"/Library/Frameworks/R.framework/Versions/4.1/Resources/library/BH/include" -I"/Library/Frameworks/R.framework/Versions/4.1/Resources/library/StanHeaders/include/src/" -I"/Library/Frameworks/R.framework/Versions/4.1/Resources/library/StanHeaders/include/" -I"/Library/Frameworks/R.framework/Versions/4.1/Resources/library/RcppParallel/include/" -I"/Library/Frameworks/R.framework/Versions/4.1/Resources/library/rstan/include" -DEIGEN_NO_DEBUG -DBOOST_DISABLE_ASSERTS -DBOOST_PENDING_INTEGER_LOG2_HPP -DSTAN_THREADS -DBOOST_NO_AUTO_PTR -include '/Library/Frameworks/R.framework/Versions/4.1/Resources/library/StanHeaders/include/stan/math/prim/mat/fun/Eigen.hpp' -D_REENTRANT -DRCPP_PARALLEL_USE_TBB=1 -I/usr/local/include -fPIC -Wall -g -O2 -c foo.c -o foo.o
## In file included from <built-in>:1:
## In file included from /Library/Frameworks/R.framework/Versions/4.1/Resources/library/StanHeaders/include/stan/math/prim/mat/fun/Eigen.hpp:13:
## In file included from /Library/Frameworks/R.framework/Versions/4.1/Resources/library/RcppEigen/include/Eigen/Dense:1:
## In file included from /Library/Frameworks/R.framework/Versions/4.1/Resources/library/RcppEigen/include/Eigen/Core:88:
## /Library/Frameworks/R.framework/Versions/4.1/Resources/library/RcppEigen/include/Eigen/src/Core/util/Macros.h:628:1: error: unknown type name 'namespace'
## namespace Eigen {
## ^
## /Library/Frameworks/R.framework/Versions/4.1/Resources/library/RcppEigen/include/Eigen/src/Core/util/Macros.h:628:16: error: expected ';' after top level declarator
## namespace Eigen {
## ^
## ;
## In file included from <built-in>:1:
## In file included from /Library/Frameworks/R.framework/Versions/4.1/Resources/library/StanHeaders/include/stan/math/prim/mat/fun/Eigen.hpp:13:
## In file included from /Library/Frameworks/R.framework/Versions/4.1/Resources/library/RcppEigen/include/Eigen/Dense:1:
## /Library/Frameworks/R.framework/Versions/4.1/Resources/library/RcppEigen/include/Eigen/Core:96:10: fatal error: 'complex' file not found
## #include <complex>
## ^~~~~~~~~
## 3 errors generated.
## make: *** [foo.o] Error 1
baseline_fit <- sampling(baseline_model, formatted_data, iter=n_iter, seed=124)
Next, I check the MCMC trace plots. Note that the baseline model is wrong, so I don’t expect this model’s parameter estimates to say anything about ground truth parameter values.
Currently I’m not getting great mixing for the baseline model. I’m iterating through random seeds until I find a good mix. The better thing to do would be to tweak the priors or increase sample size, but that is a good use of time right now.
traceplot(baseline_fit, c("tau", "beta1", "rho1", "kappa1", "beta2", "rho2", "kappa2", "beta3", "rho3", "kappa3"))
Next I make sure there is good posterior predictive fit.
### Check how well the baseline model fits the ground truth
y_pred_baseline <- rstan::extract(baseline_fit, "y_pred")$y_pred
plot(training$X1, training$Y, pch=20, xlab="X1", ylab="Y", ylim=c(2500, 4500))
for(i in 1:n_iter){
points(training$X1, y_pred_baseline[i, ], pch=20, col=rgb(139/255, 0, 0, 0.01))
}
plot(training$X2, training$Y, pch=20, xlab="X2", ylab="Y", ylim=c(2500, 4500))
for(i in 1:n_iter){
points(training$X2, y_pred_baseline[i, ], pch=20, col=rgb(139/255, 0, 0, 0.01))
}
plot(training$X3, training$Y, pch=20, xlab="X3", ylab="Y", ylim=c(2500, 4500))
for(i in 1:n_iter){
points(training$X3, y_pred_baseline[i, ], pch=20, col=rgb(139/255, 0, 0, 0.01))
}
These predictive fit plots are more important in this case of the baseline model. A core argument of the paper is that the baseline model is flexible enough to fit observational data and estimate ATE’s and thus optimize based on ATE, but it can’t handle multiverse counterfactual-based optimization. Thus, the baseline model should be flexible enough to fit the data well.
Right now the baseline model is fitting way better, but I’ll put the models on more equal footing once I get the counterfactual optimization in place.
### Get Loo
log_lik_baseline_fit <- extract_log_lik(baseline_fit, merge_chains = FALSE)
r_eff <- relative_eff(exp(log_lik_baseline_fit), cores = 4)
loo_baseline_fit <- loo(log_lik_baseline_fit, r_eff = r_eff, cores = 4)
## Warning: Some Pareto k diagnostic values are too high. See help('pareto-k-diagnostic') for details.
log_lik_ground_truth_fit <- extract_log_lik(ground_truth_fit, merge_chains = FALSE)
r_eff <- relative_eff(exp(log_lik_ground_truth_fit), cores = 4)
loo_ground_truth_fit <- loo(log_lik_ground_truth_fit, r_eff = r_eff, cores = 4)
loo_compare(loo_baseline_fit, loo_ground_truth_fit)
## elpd_diff se_diff
## model2 0.0 0.0
## model1 -866.0 194.4
The following is the target plot.
plot_data0 <- as_tibble(rstan::extract(baseline_fit, c("X1_max", "X2_max", "X3_max", "eta_max"))) %>%
rename(
Y=eta_max,
X1 = X1_max,
X2 = X2_max,
X3 = X3_max
) %>%
mutate(model = "baseline")
plot_data1 <- as_tibble(rstan::extract(ground_truth_fit, c("X1_max", "X2_max", "X3_max", "eta_max"))) %>%
rename(
Y=eta_max,
X1 = X1_max,
X2 = X2_max,
X3 = X3_max
) %>%
mutate(model = "proposed ATE only")
plot_data <- rbind(plot_data0, plot_data1) %>%
mutate(allocation = paste(X1, X2, X3, sep="-")) %>%
mutate(allocation = factor(allocation, levels=allocations$allocation)) %>%
mutate(model = as.factor(model))
alloc_levels <- levels(plot_data$allocation)
num_levels <- length(alloc_levels)
pfinal <- ggplot(plot_data, aes(x=allocation, y=Y)) +
geom_point(aes(colour=model)) +
scale_x_discrete(limits=alloc_levels, breaks=alloc_levels[seq(1, num_levels, by=500)]) +
theme(axis.text.x = element_text(angle = 45, hjust=1, size=6)) +
geom_vline(xintercept=optimal_alloc) +
ylim(3750, 4000) +
annotate(geom = "vline",
x = optimal_alloc,
xintercept = optimal_alloc,
linetype = c("solid")) +
annotate(geom = "text",
label = optimal_alloc,
x = optimal_alloc,
y = 3855,
angle = 90,
vjust = 2)
## Warning: Ignoring unknown aesthetics: x
print(pfinal)
I’m still tweaking. The x-axis allocation, the Y axis is expected return. This is a posterior predictive plot, i.e. each point is an optimal allocation and corresponding expected outcome conditional on a sample from the posterior distribution of the parameters. Currently I’m showing results only for the ATE-based optimization from the two models. The blue is the proposed model, the red is the baseline model. What is missing is an addition “model” color for “proposed multiverse optimization” or something. I’m working on that now.
The vertical line is the same from “ground truth” plot, which is the true optimal allocation. I’d like to overlay the ground truth curve on this plot but it’s turning out to be pretty tricky. I’ll switch back to that when I’ve added the counterfactual bit. I’ll also trim allocations on the right and left of the x axis in the image.