Fit vanilla RSA models to the data from prior and levels series experiments.
library(binom)
library(magrittr)
rm(list=ls())
source("../analysis/useful_dplyr.R")
## Loading required package: Matrix
##
## Attaching package: 'dplyr'
##
## The following objects are masked from 'package:lubridate':
##
## intersect, setdiff, union
##
## The following objects are masked from 'package:stats':
##
## filter, lag
##
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
##
##
## Attaching package: 'tidyr'
##
## The following object is masked from 'package:Matrix':
##
## expand
##
## The following object is masked from 'package:magrittr':
##
## extract
source("agents.R")
source("util.R")
source("matrices.R")
knitr::opts_chunk$set(fig.width=12, cache=TRUE,
warning=FALSE, message=FALSE)
library(dplyr, warn.conflicts = FALSE, quietly=TRUE)
library(tidyr, warn.conflicts = FALSE, quietly=TRUE)
Set up helper function to run model.
do.model <- function(matrix,
model = "LSLS",
query_feature,
query_target,
prior = NULL,
debug = FALSE) {
mat <- eval(parse(text=as.character(matrix)))
mod <- eval(parse(text=paste(as.character(model),
"(mat, prior = prior)",
sep="")))
if (debug) {
print(model)
print(matrix)
print(prior)
print(mod)
}
return(mod[as.character(query_feature), as.character(query_target)])
}
Note: levels_mod.csv
has the levels of the “levels” complex experiment manually modified, because while we are using terms like “target” here as an absolute reference, it was used as a relative reference in that experiment design. Meaning, “target” for level 0 was actually what we refer to here as “logical,” and “target” for level 1 was “foil” here.
priors_mod.csv
just has some levels renamed forso they are terser and more informative.
d.prior <- read.csv("data/prior_mod.csv")
d.levels <- read.csv("data/levels_mod.csv")
Make sure that each experiment is tagged with its appropriate matrix.
d.raw <- bind_rows(d.prior, d.levels) %>%
mutate(matrix = ifelse(expt == "color" | expt == "prior" |
expt == "lang" | expt == "baserate", "simple", expt))
split priors and inference, and make sure that each inference is tagged with its prior. Some of this has to be manual AFAIK.
prior <- filter(d.raw, question == "prior") %>%
mutate(prior = str_c(cond, "-", expt)) %>%
select(-cond, -expt, -matrix, -question)
inference <- filter(d.raw, question == "inference") %>%
mutate(prior = str_c(cond, "-", expt),
query = "glasses")
inference$prior[inference$expt == "complex"] <- "0-complex"
inference$prior[inference$expt == "oddman"] <- "prior-oddman"
inference$prior[inference$expt == "twins"] <- "prior-twins"
inference$prior[inference$expt == "simple"] <- "prior-prior"
inference$prior[inference$prior == "none-color"] <- "prior-prior"
inference$query[inference$cond == "0" & inference$matrix == "simple"] <- "hat"
inference$query[inference$cond == "0" & inference$matrix == "complex"] <- "hat"
inference$query[inference$cond == "1" & inference$matrix == "complex"] <- "mustache"
inference$query[inference$cond == "2" & inference$matrix == "complex"] <- "glasses"
inference$query[inference$cond == "twin" & inference$matrix == "twins"] <- "hat"
inference$query[inference$cond == "uniform" & inference$matrix == "twins"] <- "mustache"
Get CIs, etc.
Critically, this is where we add \(\epsilon\), a smoothing parameter on the prior.
inference %<>%
select(-question) %>%
gather(object, count, foil, logical, target) %>%
mutate(count = ifelse(is.na(count),
0, count)) %>%
group_by(expt, cond) %>%
mutate(p = count / sum(count),
cil = binom.bayes(count, sum(count))$lower,
cih = binom.bayes(count, sum(count))$upper)
eps <- 1
prior %<>%
mutate(normalizer = foil + target + logical + 3*eps,
foil = (foil + eps) / normalizer,
target = (target + eps) / normalizer,
logical = (logical + eps) / normalizer) %>%
select(foil, logical, target, prior)
Now run models by merging in priors.
## add prior on every row
d <- left_join(inference, prior)
models <- factor(c("L0","LS","LSLS","LSLSLS"))
md <- data.frame()
for (i in 1:length(models)) {
md.new <- d %>%
group_by(expt, cond, object) %>%
mutate(model = as.character(models[i]),
prediction = do.model(matrix = matrix,
model = model,
query_feature = query,
query_target = object,
prior = c(foil,
target,
logical)))
md <- bind_rows(md, md.new)
}
md$prior <- "prior"
for (i in 1:length(models)) {
md.new <- d %>%
group_by(expt, cond, object) %>%
mutate(model = as.character(models[i]),
prediction = do.model(matrix = matrix,
model = model,
query_feature = query,
query_target = object,
prior = c(.333, .333, .333)))
md.new$prior <- "uniform"
md <- bind_rows(md, md.new)
}
Investigate/debug sample model runs
this_row <- d[d$expt == "color" & d$object == "target" & d$cond == "logical",]
this_row %>% data.frame
## cond expt matrix prior query object count p
## 1 logical color simple logical-color glasses target 26 0.5416667
## cil cih foil logical target
## 1 0.4028753 0.6778426 0.1481481 0.5555556 0.2962963
with(this_row,
do.model(matrix = matrix,
model = "LSLS",
query_feature = query,
query_target = object,
prior = c(foil, target, logical),
debug=TRUE))
## [1] "LSLS"
## [1] "simple"
## [1] 0.1481481 0.2962963 0.5555556
## foil target logical
## hat 0 0.0000000 1.0000000
## glasses 0 0.6808511 0.3191489
## [1] 0.6808511
First, all points.
ggplot(md, aes(x = prediction, y = p, col = expt, pch = object)) +
geom_pointrange(aes(ymin = cil, ymax = cih)) +
geom_text(aes(label = cond, x = prediction + .02), size=3, hjust=0) +
xlim(c(0,1)) + ylim(c(0,1)) +
facet_grid(prior~model) +
ylab("Proportion choosing target") +
xlab("Model Predictions") +
geom_smooth(method="lm", aes(group=1)) +
geom_abline(slope = 1, intercept = 0, lty=2)
Plot just those with non-0/1 values.
ggplot(filter(md, prediction > 0 & prediction < 1),
aes(x = prediction, y = p, col = expt, pch = object)) +
geom_pointrange(aes(ymin = cil, ymax = cih)) +
geom_text(aes(label = cond, x = prediction + .02), size=3, hjust=0) +
xlim(c(0,1)) + ylim(c(0,1)) +
facet_grid(prior~model) +
ylab("Proportion choosing target") +
xlab("Model Predictions") +
geom_smooth(method="lm", aes(group=1)) +
geom_abline(slope = 1, intercept = 0, lty=2)
Stats.
md %>%
group_by(prior, model) %>%
summarise(r_pearson = cor.test(p, prediction, method = "pearson")$estimate,
r_spearman = cor.test(p, prediction, method = "spearman")$estimate,
MSE = mean((prediction-p)^2))
## Source: local data frame [8 x 5]
## Groups: prior
##
## prior model r_pearson r_spearman MSE
## 1 prior L0 0.7357369 0.7817368 0.045675270
## 2 prior LS 0.7787035 0.7988274 0.040124794
## 3 prior LSLS 0.9202643 0.9115726 0.015090792
## 4 prior LSLSLS 0.9518606 0.9284745 0.009782361
## 5 uniform L0 0.7357369 0.7817368 0.045675270
## 6 uniform LS 0.9139893 0.9218061 0.015929303
## 7 uniform LSLS 0.9517984 0.9189843 0.010112532
## 8 uniform LSLSLS 0.9503321 0.9189843 0.012607572
md %>%
filter(prediction > 0 & prediction < 1) %>%
group_by(prior, model) %>%
summarise(r_pearson = cor.test(p, prediction, method = "pearson")$estimate,
r_spearman = cor.test(p, prediction, method = "spearman")$estimate,
MSE = mean((prediction-p)^2))
## Source: local data frame [8 x 5]
## Groups: prior
##
## prior model r_pearson r_spearman MSE
## 1 prior L0 0.1423435 0.1718201 0.07124837
## 2 prior LS 0.4091706 0.3970011 0.06220902
## 3 prior LSLS 0.8545744 0.8412276 0.02143936
## 4 prior LSLSLS 0.9127571 0.9059698 0.01279420
## 5 uniform L0 0.1423435 0.1718201 0.07124837
## 6 uniform LS 0.9096561 0.9020929 0.02280494
## 7 uniform LSLS 0.9088052 0.8893813 0.01333191
## 8 uniform LSLSLS 0.9032426 0.8893813 0.01739526