Simulating UID negative correlation with UID cost in speaker/listener populations

See https://github.com/benpeloquin7/uid_rsa_conversations/blob/master/models/simulation.py for modeling framework – an extension of Levy (2018) / an alternative framework to the fixed point analysis.

Summary

Negative correlation arises in populations of RSA agents but not non-RSA agents.
Negative correlation appears to increase as a function of population size.
- Population size increase is equivalent to greater RSA iterations (we should control for this across populations sizes.)

fp_rsa_100 <- "../outputs/agent-100-full.csv"           # not permuted conversation partners
fp_no_rsa_100 <- "../outputs/agent-100-no-rsa-full.csv" # not permuted conversation partners
fp_rsa_10 <- "../outputs/agent-10-full.csv"             # 10 agents
fp_no_rsa_10 <- "../outputs/agent-10-no-rsa-full.csv"
fp_rsa_6 <- "../outputs/agent-6-full.csv"               # 6 agents
fp_no_rsa_6 <- "../outputs/agent-6-no-rsa-full.csv"
fp_rsa_4 <- "../outputs/agent-4-full.csv"               # 4 agents
fp_no_rsa_4 <- "../outputs/agent-4-no-rsa-full.csv"
fp_rsa_2 <- "../outputs/agent-2-full.csv"               # 2 agents
fp_no_rsa_2 <- "../outputs/agent-2-no-rsa-full.csv"
fp_rsa_1 <- "../outputs/agent-1-full.csv"               # 1 agents
fp_no_rsa_1 <- "../outputs/agent-1-no-rsa-full.csv"

df <- rbind(
  # read.csv(fp_rsa_100) %>% mutate(use_rsa=TRUE),
  # read.csv(fp_no_rsa_100) %>% mutate(use_rsa=FALSE),
  read.csv(fp_rsa_10) %>% mutate(use_rsa=TRUE),
  read.csv(fp_no_rsa_10) %>% mutate(use_rsa=FALSE),
  read.csv(fp_rsa_6) %>% mutate(use_rsa=TRUE),
  read.csv(fp_no_rsa_6) %>% mutate(use_rsa=FALSE),
  read.csv(fp_rsa_4) %>% mutate(use_rsa=TRUE),
  read.csv(fp_no_rsa_4) %>% mutate(use_rsa=FALSE),
  read.csv(fp_rsa_2) %>% mutate(use_rsa=TRUE),
  read.csv(fp_no_rsa_2) %>% mutate(use_rsa=FALSE),
  read.csv(fp_rsa_1) %>% mutate(use_rsa=TRUE),
  read.csv(fp_no_rsa_1) %>% mutate(use_rsa=FALSE)
)

df_pop <- df %>%
  filter(id=='pop')

## Warning: package 'bindrcpp' was built under R version 3.4.4

df_no_pop <- df %>%
  filter(id!='pop')

Preprocessing

df_stable <- df_pop[!is.na(df_pop$r), ]

# Get max rounds by id, sim_id
max_rounds <- df_stable %>%
  group_by(use_rsa, n_agents, id, sim_id) %>%
  summarise(max_round=max(round))
get_max_round <- function(use_rsa_, n_agents_, id_, sim_id_) {
  subset(max_rounds, (use_rsa==use_rsa_ & n_agents==n_agents_ & id==id_ & sim_id==sim_id_))$max_round
}
# Attempt with mapply -- note this is slow running with we don't restrict to id==pop (for larger speaker populations).
max_rounds_list <- mapply(get_max_round, df_stable$use_rsa, df_stable$n_agents, df_stable$id, df_stable$sim_id)
# Attempt with purrr:pmap
# max_rounds_list <- df_stable %>%
#   select(use_rsa, n_agents, id, sim_id) %>%
#   purrr::pmap(., function(use_rsa, n_agents, id, sim_id) {get_max_round(use_rsa, n_agents, id, sim_id)}) %>%
#   unlist()
assertthat::are_equal(length(max_rounds_list), nrow(df_stable))

## [1] TRUE

# Assign max round
df_stable$max_round <- max_rounds_list
df_stable <- df_stable %>%
  mutate(stable=max_round==n_rounds-1,
         is_final_round=max_round==round)

`r` Trajectories

df_stable %>%
  filter(id=='pop') %>%
  mutate(use_rsa=factor(ifelse(use_rsa, 'rsa', 'no-rsa'), levels=c('rsa', 'no-rsa'))) %>%
  ggplot(aes(x=round, y=r, lty=use_rsa, col=as.factor(sim_id))) +
    geom_line(alpha=0.6) +
    ylim(-1, 1) +
    theme_classic() +
    theme(legend.position='none') +
    facet_wrap(n_agents~use_rsa, nrow=5)

`that-rate` Trajectories

df_stable %>%
  filter(id=='pop') %>%
  mutate(use_rsa=factor(ifelse(use_rsa, 'rsa', 'no-rsa'), levels=c('rsa', 'no-rsa'))) %>%
  ggplot(aes(x=round, y=that_rate, col=as.factor(sim_id))) +
    geom_line(alpha=0.6) +
    ylim(0, 1) +
    theme_classic() +
    theme(legend.position='none') +
    facet_wrap(n_agents~use_rsa, nrow=5)

`that-rate` distr

df_stable %>%
  filter(is_final_round, id=='pop') %>%
  mutate(use_rsa=factor(ifelse(use_rsa, 'rsa', 'no-rsa'), levels=c('rsa', 'no-rsa'))) %>%
  ggplot(aes(x=that_rate,y=..density..)) + 
      geom_histogram(bins=32) + 
      scale_x_continuous(limits=c(-0.05,1.05)) +
      ylab("Probability density") +
      xlab(expression(paste("Marginal frequency of optional marker ", t))) +
      theme_classic() +
      facet_wrap(n_agents~use_rsa, nrow=5)

## Warning: Removed 10 rows containing missing values (geom_bar).

`r` distr

df_stable %>% 
  filter(is_final_round, id=='pop') %>%
  mutate(use_rsa=factor(ifelse(use_rsa, 'rsa', 'no-rsa'), levels=c('rsa', 'no-rsa'))) %>%
  ggplot(aes(x=r, y=..density.., fill=as.factor(n_agents))) + 
    geom_vline(xintercept=0.0, lty=2, alpha=0.3) +
    geom_histogram(bins=42, alpha=0.9) + 
    scale_x_continuous(limits=c(-1.05,1.05)) +
    ylab("Probability density") +
    xlab("Pearson correlation between\nphrase onset & t probabilities") + 
    theme_classic() +
    facet_grid(n_agents~use_rsa)

Significant diffs?

this is actually really interesting – the effect is not a fun

df_stable %>%
  filter(is_final_round) %>%
  group_by(use_rsa, n_agents) %>%
  summarise(mean_r=mean(r),
            sd_r=sd(r),
            n=n(),
            ci_hi=mean_r+qnorm(0.975)*sd_r/sqrt(n),
            ci_lo=mean_r+qnorm(0.025)*sd_r/sqrt(n)) %>%
  ggplot(aes(x=as.factor(n_agents), y=mean_r, fill=use_rsa)) +
    geom_bar(stat='identity', position='dodge') +
    geom_errorbar(aes(ymin=ci_lo, ymax=ci_hi), width=0.2, position=position_dodge(0.9)) +
    ylim(-0.3, 0.1) +
    theme_classic()

df_stable %>%
  filter(is_final_round) %>%
  mutate(use_rsa=factor(ifelse(use_rsa, 'rsa', 'no-rsa'), levels=c('rsa', 'no-rsa'))) %>%
  ggplot(aes(x=as.factor(n_agents), y=r, col=use_rsa, fill=use_rsa)) +
    geom_hline(yintercept=0.0, lty=2, alpha=0.2) +
    geom_point(aes(col=use_rsa), alpha=0.4) +
    # geom_boxplot(alpha=0) +
    geom_violin(alpha=0.3) +
    
    coord_flip() +
    theme_classic() +
    facet_wrap(~use_rsa)

Convergence and entropy

Average lexicon entropy decreases across RSA conversations.

df_pop %>%
  group_by(use_rsa, n_agents, round) %>%
  summarise(mean_entropy=mean(entropy),
            sd_entropy=sd(entropy),
            n=n(),
            ci_hi=mean_entropy+qnorm(0.975)/sqrt(n),
            ci_lo=mean_entropy+qnorm(0.025)/sqrt(n)) %>%
  ungroup() %>%
  mutate(use_rsa=factor(ifelse(use_rsa, 'rsa', 'no-rsa'), levels=c('rsa', 'no-rsa'))) %>%
  ggplot(aes(x=round, y=mean_entropy, col=use_rsa)) +
    geom_line() +
    geom_errorbar(aes(ymin=ci_lo, ymax=ci_hi), alpha=0.3) +
    theme_classic() +
    facet_wrap(~n_agents)

df_stable %>%
  filter(is_final_round) %>%
  mutate(use_rsa=factor(ifelse(use_rsa, 'rsa', 'no-rsa'), levels=c('rsa', 'no-rsa'))) %>%
  ggplot(aes(x=as.factor(n_agents), y=entropy, fill=use_rsa)) +
    geom_boxplot() +
    theme_classic()

Decerease in lexicon entropy.

df_stable %>%
  filter(is_final_round, id=='pop')  %>%
  mutate(use_rsa=factor(ifelse(use_rsa, 'rsa', 'no-rsa'), levels=c('rsa', 'no-rsa'))) %>%
  select(use_rsa, n_agents, sim_id, entropy) %>%
  spread(use_rsa, entropy) %>%
  mutate(diff=rsa-`no-rsa`) %>%
  group_by(n_agents) %>%
  ggplot(aes(x=as.factor(n_agents), y=diff, fill=as.factor(n_agents))) +
    geom_hline(yintercept=0, lty=2, alpha=0.4, col='red')  +
    geom_violin(aes(fill=as.factor(n_agents)), alpha=0.4, trim=FALSE) +
    geom_point(alpha=0.5) +
    coord_flip() +
    theme_classic()

## Warning: Removed 8 rows containing non-finite values (stat_ydensity).

## Warning: Removed 8 rows containing missing values (geom_point).

Due to lower overall entropy individual speakers lexicons converge more closely, perhaps as a function of populations size.

df_no_pop %>% 
  group_by(use_rsa, n_agents, round) %>%
  summarise(mean_KL=mean(KL),
            sd_KL=sd(KL),
            n=n(),
            ci_hi=mean_KL+qnorm(0.975)/sqrt(n),
            ci_lo=mean_KL+qnorm(0.025)/sqrt(n)) %>%
  ungroup() %>%
  mutate(use_rsa=factor(ifelse(use_rsa, 'rsa', 'no-rsa'), levels=c('rsa', 'no-rsa'))) %>%
  ggplot(aes(x=round, y=mean_KL, col=as.factor(n_agents))) +
    geom_line() +
    # geom_errorbar(aes(ymin=ci_lo, ymax=ci_hi)) +
    ylim(0, 0.3) +
    theme_classic() +
    facet_wrap(~use_rsa)

B-prob example (one simulation). What should aggregate statistic be?

The population converges on a shared distribution over relative clauses (worth thinking as need-probabilities).

df_no_pop %>%
  filter(sim_id %in% c(10)) %>%
  ggplot(aes(x=round, y=current_B_prob, col=as.factor(paste0(id, '-', sim_id)))) +
    geom_line() +
    theme_classic() +
    facet_grid(use_rsa~n_agents)

t-prob example (one simulation). What should aggregate statistic be?

However, there is still lots of individual-level variation in terms of optinal marking for RSA agents…

df_no_pop %>%
  filter(sim_id %in% c(10)) %>%
  ggplot(aes(x=round, y=current_t_prob, col=as.factor(paste0(id, '-', sim_id)))) +
    geom_line() +
    theme_classic() +
    facet_grid(use_rsa~n_agents)

Variance within a population optional-t usage increases for RSA agents.

How to interpret this? As the conversations proceed the within population variance of optional-t usage increases for RSA speakers and declines for non-rsa speakers.

df_no_pop %>%
  mutate(use_rsa=factor(ifelse(use_rsa, 'rsa', 'no-rsa'), levels=c('rsa', 'no-rsa'))) %>%
  group_by(use_rsa, n_agents, id, round) %>%
  summarise(mean_current_t_probs=mean(current_t_prob),
            sd_current_t_probs=sd(current_t_prob)^2) %>%
  group_by(use_rsa, n_agents, round) %>%
  summarise(mean_sd_current_t_probs=mean(sd_current_t_probs),
            sd_sd_current_t_probs=sd(sd_current_t_probs),
            n=n(),
            ci_lo=mean_sd_current_t_probs+qnorm(0.025)*sd_sd_current_t_probs/sqrt(n),
            ci_hi=mean_sd_current_t_probs+qnorm(0.975)*sd_sd_current_t_probs/sqrt(n)) %>%
  ggplot(aes(x=round, y=mean_sd_current_t_probs, col=as.factor(n_agents))) +
    geom_errorbar(aes(ymin=ci_lo, ymax=ci_hi), alpha=0.4) +
    geom_line() +
    theme_classic() +
    facet_grid(use_rsa~., scales="free")

## Warning: Removed 200 rows containing missing values (geom_errorbar).

Samples

Note the facet numbers are not meaningful… should show this another way.

sims <- sample(unique(df_stable$sim_id), min(length(unique(df_stable$sim_id)), 16))

df_stable %>%
  filter(sim_id %in% sims, id=='pop') %>%
  mutate(use_rsa=factor(ifelse(use_rsa, 'rsa', 'no-rsa'), levels=c('rsa', 'no-rsa'))) %>%
  ggplot(aes(x=round, y=that_rate, lty=use_rsa, col=as.factor(n_agents))) +
    geom_line(alpha=0.9) +
    ylim(0, 1) +
    ggtitle("Sample that-rate trajectories") +
    facet_wrap(~sim_id) +
    theme_classic()

df_stable %>%
  filter(sim_id %in% sims, id=='pop') %>%
  mutate(use_rsa=factor(ifelse(use_rsa, 'rsa', 'no-rsa'), levels=c('rsa', 'no-rsa'))) %>%
  ggplot(aes(x=round, y=r, lty=use_rsa, col=as.factor(n_agents))) +
    geom_line(alpha=0.9) +
    ylim(-1, 1) +
    ggtitle("Sample r trajectories") +
    facet_wrap(~sim_id) +
    theme_classic()

summary(lm(r~as.factor(n_agents)+use_rsa, data=df_stable))

## 
## Call:
## lm(formula = r ~ as.factor(n_agents) + use_rsa, data = df_stable)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.86563 -0.10495  0.00929  0.12117  0.89316 
## 
## Coefficients:
##                        Estimate Std. Error t value Pr(>|t|)    
## (Intercept)            0.038476   0.001552   24.79   <2e-16 ***
## as.factor(n_agents)2  -0.023040   0.002020  -11.40   <2e-16 ***
## as.factor(n_agents)4  -0.030702   0.001999  -15.36   <2e-16 ***
## as.factor(n_agents)6  -0.048918   0.001995  -24.52   <2e-16 ***
## as.factor(n_agents)10 -0.039181   0.001990  -19.69   <2e-16 ***
## use_rsaTRUE           -0.171624   0.001244 -137.95   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.1912 on 94903 degrees of freedom
## Multiple R-squared:  0.1745, Adjusted R-squared:  0.1744 
## F-statistic:  4012 on 5 and 94903 DF,  p-value: < 2.2e-16

Stable optionality plot

# R. Levy's preprocessing
# dat$stable <- with(dat,thatrate > 0.001 & thatrate < 0.999)
# dat <- subset(dat, ! (k==1.0 & c==0.0))
# dat.summary <- dat %>% group_by(k,c) %>%
#   dplyr:::summarise(stable=mean(stable),r=mean(r))

# This preprocessing is only valid for stable optionality plot
df_preprocessed <- df %>%
  filter(al_round, id!='pop') %>%
  filter(k != 1.0, c != 0.0) %>%
  group_by(id, k, c) %>%
  summarise(stable=mean(is_stable), r=mean(r))

stables <- df %>% 
  filter(is_final_round) %>%
  mutate(stable=ifelse(that_rate > 0.1 & that_rate < 0.9, TRUE, FALSE)) %>%
  filter(stable)
stables <- stables$sim_id
# Plot all trajectories

df_preprocessed %>%
  filter(id!='pop') %>%
  ggplot(aes(k,c)) + 
    geom_tile(aes(fill=r),colour="white") +
    labs(y=expression(paste("String length cost parameter ", c)), fill="stable\noptionality\nrate") +
    theme_classic() +
    scale_x_continuous(name=expression(paste("Nonuniformity penalization parameter ",k)),
                       breaks=seq(0,2,by=0.2))

Analysis

Ben

7/11/2018

Simulating UID negative correlation with UID cost in speaker/listener populations

Summary

`r` Trajectories

`that-rate` Trajectories

`that-rate` distr

`r` distr

Significant diffs?

Convergence and entropy

Samples

Stable optionality plot

Analysis

Ben

7/11/2018

Simulating UID negative correlation with UID cost in speaker/listener populations

Summary

r Trajectories

that-rate Trajectories

that-rate distr

r distr

Significant diffs?

Convergence and entropy

Samples

Stable optionality plot

`r` Trajectories

`that-rate` Trajectories

`that-rate` distr

`r` distr