library(tidyverse)
## ── Attaching packages ─────────────────────────────────────── tidyverse 1.3.2 ──
## ✔ ggplot2 3.4.1     ✔ purrr   1.0.1
## ✔ tibble  3.2.1     ✔ dplyr   1.1.4
## ✔ tidyr   1.3.0     ✔ stringr 1.5.0
## ✔ readr   2.1.3     ✔ forcats 0.5.2
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
library(here)
## here() starts at /Users/caoanjie/Desktop/projects/RANCH/RANCH_model/diagnostics

Simulation specs

This is a simulation to compare the scaled vs unscaled embeddings. The simulation shares all the same parameters except for the values of the embeddings. The parameters are as following:

BATCH_INFO = {
    "jitter_n": 1, 
    "total_batch_n": 1, 
    "jitter_mode": "sampling"
}

GRID_INFO = {
    "grid_mu_start": -4, "grid_mu_end": 4, "grid_mu_step": 5, 
    "grid_sigma_start": 0.001, "grid_sigma_end": 1.8, "grid_sigma_step": 5, 
    "grid_y_start": -4, "grid_y_end": 4, "grid_y_step": 5, 
    "grid_epsilon_start": 0.00000000001, "grid_epsilon_end": 1, "grid_epsilon_step": 5, 
    "hypothetical_obs_grid_n": 10
}


BATCH_GRID_INFO = num_stab_help.get_batch_grid(BATCH_INFO, GRID_INFO)

PRIOR_INFO = {
    "mu_prior": 1,  
    "V_prior": 1, 
    "alpha_prior": 3, 
    "beta_prior": 1, 
    "epsilon": 0.0001, "mu_epsilon":0.0001, "sd_epsilon": 0.0001, 
    "hypothetical_obs_grid_n": 5, 
    "world_EIGs": 0.00001, "max_observation": 500
}

And for the stimuli, the scaled stimuli are:

fam: tensor([-0.2870,  0.2874,  0.3432], dtype=torch.float64) test: tensor([-0.4805, -0.9730, -0.2181], dtype=torch.float64)

and the unscaled are 10x of the original

fam: tensor([-2.870,  2.874,  3.432], dtype=torch.float64) test: tensor([-4.805, -9.730, -2.181], dtype=torch.float64)

Each consisted of 50 run of the same simulation, The stimuli squence are BBBBD

Below are the comparisons

library(tidyverse)
library(here)

eig_df <- read_csv(here("scaled_vs_unscaled/eig_df.csv"))
## New names:
## Rows: 25000 Columns: 8
## ── Column specification
## ──────────────────────────────────────────────────────── Delimiter: "," dbl
## (4): ...1, stimulus_id, EIG, run_id lgl (4): Look_away, surprisal, kl, prob
## ℹ Use `spec()` to retrieve the full column specification for this data. ℹ
## Specify the column types or set `show_col_types = FALSE` to quiet this message.
## • `` -> `...1`
sim_df <- read_csv(here("scaled_vs_unscaled/sim_df.csv"))
## New names:
## Rows: 250 Columns: 3
## ── Column specification
## ──────────────────────────────────────────────────────── Delimiter: "," dbl
## (3): ...1, sample_n, run_id
## ℹ Use `spec()` to retrieve the full column specification for this data. ℹ
## Specify the column types or set `show_col_types = FALSE` to quiet this message.
## • `` -> `...1`
x10_eig_df <- read_csv(here("scaled_vs_unscaled/x10_eig_df.csv"))
## New names:
## Rows: 25000 Columns: 8
## ── Column specification
## ──────────────────────────────────────────────────────── Delimiter: "," dbl
## (4): ...1, stimulus_id, EIG, run_id lgl (4): Look_away, surprisal, kl, prob
## ℹ Use `spec()` to retrieve the full column specification for this data. ℹ
## Specify the column types or set `show_col_types = FALSE` to quiet this message.
## • `` -> `...1`
x10_sim_df <- read_csv(here("scaled_vs_unscaled/x10_sim_df.csv"))
## New names:
## Rows: 250 Columns: 3
## ── Column specification
## ──────────────────────────────────────────────────────── Delimiter: "," dbl
## (3): ...1, sample_n, run_id
## ℹ Use `spec()` to retrieve the full column specification for this data. ℹ
## Specify the column types or set `show_col_types = FALSE` to quiet this message.
## • `` -> `...1`

First let’s compare the missing values of eig. From the values we can’t see any clear dishabitaution in the x10 case

eig_df %>% 
  mutate("type" = "org") %>% 
  filter(!is.na(stimulus_id)) %>% 
  bind_rows(
    x10_eig_df %>% 
      mutate("type" = "x10") %>% 
      filter(!is.na(stimulus_id))
  ) %>% 
  group_by(type, run_id) %>% 
  mutate(t = row_number()) %>% 
  ggplot(aes(x = stimulus_id, y = EIG)) + 
  geom_point(position = position_jitter(width=.1))+ 
  facet_wrap(~type)

we can also check on invalid values (i.e. the number of rows with EIG < 0).

eig_df %>% 
  filter(EIG < 0) %>% 
  count()
## # A tibble: 1 × 1
##       n
##   <int>
## 1     0
x10_eig_df %>% 
  filter(EIG < 0) %>% 
  group_by(stimulus_id) %>% 
  count()
## # A tibble: 1 × 2
## # Groups:   stimulus_id [1]
##   stimulus_id     n
##         <dbl> <int>
## 1           4    50

Then we can look at the trajectories. The originals doesn’t look quite right, maybe due to the prior? no clear habituation but very clear dishabituation

sim_df %>% 
  rename(stimulus_id = `...1`) %>% 
  ggplot(aes(x = stimulus_id, y = sample_n)) + 
  geom_point(alpha = .3) + 
  stat_summary(fun.data = "mean_cl_boot", color = "red") + 
  stat_summary(fun.data = "mean_cl_boot", geom = "line", color = "red") + 
  labs(title = "scaled")

x10_sim_df %>% 
  rename(stimulus_id = `...1`) %>% 
  ggplot(aes(x = stimulus_id, y = sample_n)) + 
  geom_point(alpha = .3) + 
  stat_summary(fun.data = "mean_cl_boot", color = "red") + 
  stat_summary(fun.data = "mean_cl_boot", geom = "line", color = "red") + 
  labs(title = "x10 scaled (unscaled proxy)")

it turns out there was no variation for the unscaled version at all. all backgrounds have 1 sample, and deviant has 2 samples. persumably due to the last trial always having negative number.

x10_sim_df 
## # A tibble: 250 × 3
##     ...1 sample_n run_id
##    <dbl>    <dbl>  <dbl>
##  1     0        1      0
##  2     1        1      0
##  3     2        1      0
##  4     3        1      0
##  5     4        2      0
##  6     0        1      1
##  7     1        1      1
##  8     2        1      1
##  9     3        1      1
## 10     4        2      1
## # ℹ 240 more rows