d.lit <- read.csv(paste(local.path,
"experiment/data_analysis/data/literalSemantics_wNeg.csv",
sep = ""))
Summarize behavioral data (in terms of number of “yes”es)
d.lit.summary <- d.lit %>%
group_by(state, utterance) %>%
summarize(k = sum(judgment),
n = n())
literalSemanticsModel <- '
var n = data[0]["n"]
var k = data[0]["k"]
var literalSemantics = function(){
var theta = uniform(0,1)
observe( Binomial({n: n, p: theta}), k )
return {theta: theta}
}
'
Run model, for all sites, states, utterances
# sites <- levels(d.lit.summary$site)
states <- levels(factor(d.lit.summary$state))
utterances <- levels(d.lit.summary$utterance)
litSemantics.results <- data.frame()
for (st in states){
for (utt in utterances){
d.lit.pass <- d.lit.summary %>%
filter((state == st) & (utterance == utt))
rs <- webppl(literalSemanticsModel,
data = d.lit.pass,
data_var = "data",
inference_opts = list(
method = "rejection",
samples = 10
),
model_var = "literalSemantics",
output_format = "samples")
rs.summary <- rs %>%
# summarize(MAP = estimate_mode(theta),
# cred_low = hdi_lower(theta),
# cred_upper = hdi_upper(theta)) %>%
mutate(state = st, utterance = utt)
litSemantics.results <- bind_rows(litSemantics.results,
rs.summary)
print(utt)
}
print(st)
}
# write.csv(litSemantics.results,paste(local.path,
# "model/data/literal_semantics_wNeg_10samples_noMAP.csv",sep=""), # CHANGE FILE NAME AS NEEDED
# row.names=FALSE)
Load literal semantics BDA results
# litSemantics.results <- read.csv(paste(local.path, "model/data/literal_semantics_wNeg_10000samples.csv",
litSemantics.results <- read.csv(paste(local.path, "model/data/literal_semantics_wNeg_10000samples_noMAP.csv",
# litSemantics.results <- read.csv(paste(local.path, "model/data/literal_semantics_wNeg_10samples_noMAP.csv",
sep = ""))
litSemantics.results.org <- litSemantics.results %>%
mutate(positivity = factor(as.numeric(grepl("yes", utterance)),
levels = c(0, 1),
labels = c("negative","positive"))) %>%
mutate(utterance = substring(utterance, 5)) %>%
mutate(utterance = ordered(utterance, levels = c("terrible", "bad", "okay", "good", "amazing")))
ggplot(data=litSemantics.results.org,
aes(x=state, y=MAP, col=utterance, group=utterance)) +
geom_line() +
facet_grid(positivity~utterance) +
geom_errorbar(aes(ymin=cred_low,ymax=cred_upper, width=.1))
d.utterance <- read.csv(paste(
local.path,
"experiment/data_analysis/data/speaker.csv"
, sep="")
) %>%
filter(utterance != "NA_NA")
d <- d.utterance %>%
separate(utterance, into = c("positivity", "utterance"), sep = "_") %>%
mutate(true_state = as.factor(true_state),
goal = as.factor(goal),
positivity = as.factor(positivity),
utterance = as.factor(utterance)
)
ms2 <- d %>%
filter(!is.na(positivity), !is.na(utterance)) %>% # why is there NA?
group_by(true_state, goal) %>%
summarise(n.total=n())
ms3 <- d %>%
filter(!is.na(positivity), !is.na(utterance)) %>% # why is there NA?
group_by(true_state, goal, positivity, utterance) %>%
summarize(n = n())
ms <- left_join(ms2, ms3) %>%
group_by(true_state, goal, positivity, utterance) %>%
summarize(mean = n / n.total,
ci_lower = binom.bayes(n, n.total)$lower,
ci_upper = binom.bayes(n, n.total)$upper)
ms_fake <- cbind(expand.grid(true_state=levels(ms$true_state),goal=levels(ms$goal),positivity=levels(ms$positivity), utterance=levels(ms$utterance)), mean=NA, ci_lower=NA, ci_upper=NA)
ms.all <- rbind(data.frame(ms), data.frame(ms_fake))
levels(ms.all$true_state) <- c("1 heart", "2 hearts", "3 hearts", "4 hearts", "5 hearts")
levels(ms.all$goal) <- c("want both", "want to be informative", "want to make listener feel good")
levels(ms.all$positivity) <- c("negation", "no negation")
ggplot(data=ms.all, aes(x=positivity, y=mean, fill=utterance)) +
geom_bar(stat="identity", position=position_dodge()) +
facet_grid(goal~true_state) +
xlab("no negation (It was ~) vs negation (It wasn't ~) ") +
ylab("proportion chosen") +
# ggtitle("What would the speaker say given their goals?") +
geom_errorbar(aes(ymin=ci_lower,ymax=ci_upper),position="dodge") +
geom_hline(yintercept=.1, lty=2) +
scale_fill_discrete(guide = guide_legend(title = "word"))
Polite RSA: production model
pRSA <- '
var utterances = ["yes_terrible","yes_bad","yes_okay","yes_good","yes_amazing",
"not_terrible","not_bad","not_okay","not_good","not_amazing"
// ,"nullUtt"
];
// var states = ["1","2","3","4","5"];
var states = [1,2,3,4,5];
var statePrior = function(){
return uniformDraw(states);
};
var isNegation = function(utt){
return (utt.split("_") == "not")
};
var cost_yes = 1;
var uttCosts = function(cost_neg){
map(function(u) {return isNegation(u) ? Math.exp(-cost_neg) : Math.exp(-cost_yes)},
utterances)
}
var utterancePrior = function(cost_neg){
// return uniformDraw(utterances)
return utterances[discrete(uttCosts(cost_neg))];
};
// model parameters
// var alpha = 1.25;
// var speakerOptimality = 10;
// measured in Experiment 1
var literalSemantics = dataFromR.literalSemantics;
//display(literalSemantics)
var litSemanticsPosteriorObj = _.object(map(function(u){
return [u, _.object(map(function(s){
[s, _.pluck(_.where(literalSemantics, {state: s, utterance: u}), "theta")]
// var lst = _.pluck(_.where(literalSemantics, {state: s, utterance: u}), "theta")
// [s, lst]
}, states))]
}, utterances))
//display(litSemanticsPosteriorObj)
// e.g. {"amazing": { 1: [w1, w2, ... , wn], 2: [ ... ], ... }, "terrible": {1: [, ...]} }
var honestyWeights = [1,1,1,1,1]
var kindnessWeights = [1,1,1,1,1]
//var meaning = function(words, state){
// return flip(literalSemantics[words][state]);
//};
//display(meaning("yes_terrible", "1"))
var meaning = function(literalSemantics, words, state){
return flip(literalSemantics[words][state]);
};
var listener0 = cache(function(literalSemantics, utterance) {
Infer({method: "enumerate"}, function(){
var state = statePrior();
var m = meaning(literalSemantics, utterance, state);
condition(m);
//display(state)
return state;
});
}, 10000);
var speaker1 = cache(function(literalSemantics, exptCondInfo, rsaParameters) {
Infer({method: "enumerate"}, function(){
var state = exptCondInfo.state;
var speakerGoals = exptCondInfo.goalWeights;
var utterance = utterancePrior(rsaParameters.cost);
var speakerOptimality = rsaParameters.speakerOptimality;
var alpha = rsaParameters.alpha;
//display("so = " + speakerOptimality)
//display("alpha = " + alpha)
var L0 = listener0(literalSemantics, utterance);
var epistemicUtility = L0.score(state);
var socialUtility = expectation(L0, function(s){return alpha*s});
var eUtility = speakerGoals.honesty*epistemicUtility;
var sUtility = speakerGoals.kindness*socialUtility;
var speakerUtility = eUtility+sUtility;
factor(speakerOptimality*speakerUtility);
//display(utterance)
return utterance;
})
}, 10000)
//var listener1 = cache(function(utterance, rsaParameters) {
// Infer({method: "enumerate"}, function(){
// var speakerGoals = {
// honesty: [0.1, 0.3, 0.5, 0.7, 0.9][discrete(honestyWeights)],
// kindness: [0.1, 0.3, 0.5, 0.7, 0.9][discrete(kindnessWeights)]
// }
//
// var state = statePrior()
//
// var S1 = speaker1(state, speakerGoals, rsaParameters)
//
// observe(S1, utterance)
//
// return {
// state: state,
// goals: speakerGoals
// }
// })
//}, 10)
//
//var speaker2 = cache(function(exptCondInfo, rsaParameters) {
// Enumerate(function(){
// var state = exptCondInfo.state;
// var intendedGoals = exptCondInfo.goalWeights;
// var utterance = utterancePrior()
//
// var L1 = listener1(utterance, rsaParameters)
//
// factor(L1.score({"state":state, "goals":intendedGoals}))
// return utterance
// display(d)
//
// })
//}, 10)
'
Data analysis model
dataAnalysisModel <- '
// foreach helper function
var foreach = function(fn, lst) {
var foreach_ = function(i) {
if (i < lst.length) {
fn(lst[i]);
foreach_(i + 1);
}
};
foreach_(0);
};
//
var data = dataFromR.data;
var goals = _.uniq(_.pluck(data, "goal"));
var states = _.uniq(_.pluck(data, "true_state"));
// var utterances = _.uniq(_.pluck(data, "utterance"));
var dataAnalysis = function(){
var litSemantics = _.object(map(function(u){
return [u, _.object(map(function(s){
[s, uniformDraw(litSemanticsPosteriorObj[u][s])]
}, states))]
}, utterances))
//display(litSemantics)
var RSAparameters = {
speakerOptimality: uniformDrift({a: 0, b: 20, width:2}),
alpha: uniformDrift({a: 0, b: 5, width:0.5}),
cost: uniformDrift({a: 1, b: 5, width:0.25})
//literalSemantics: litSemantics
};
var goalWeightsAndPostPred = map(function(goal){
var goalWeights = {
honesty: uniformDrift({a: 0, b: 1, width:0.2}),
kindness: uniformDrift({a: 0, b: 1, width:0.2})
//honesty: uniformDraw([0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9]),
//kindness: uniformDraw([0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9])
}
var postPred = map(function(state){
var utteranceData = _.pluck(_.where(data, {true_state: state, goal: goal}), "utterance");
var exptConditionInfo = {
state: state,
utterance: false,
goalWeights: goalWeights
};
var literalSemantics = litSemantics;
var RSApredictions = speaker1(literalSemantics, exptConditionInfo, RSAparameters);
//RSApredictions.support()
// var linkedRSA = linkingFunction(RSApredictions);
mapData({data: utteranceData},
function(d){
// display(RSApredictions.score(d))
//display("d = " + d + " ___ scr = " + RSApredictions.score(d) )
observe(RSApredictions, d)
});
// mapData({data: utteranceData}, function(d){ observe(linkedRSA, d) });
var postSupport = RSApredictions.support(); // all utterances in the posterior
var postPredictive = map(function(u){
return {
key: "posteriorPredictive",
goal: goal,
state: state,
utt: u,
val: Math.exp(RSApredictions.score(u))
}
}, postSupport)
return postPredictive
}, states)
return [postPred,
{key: "weightHonest", goal: goal, state: "NA", utt: "NA", val: goalWeights.honesty},
{key: "weightKind", goal: goal, state: "NA", utt: "NA", val: goalWeights.kindness}
// {key: "weightMean", goal: goal, state: "NA", utt: "NA", val: goalWeights.meanness}
]
}, goals)
var returnList = _.flatten([goalWeightsAndPostPred,
//litSemantics,
{key: "speakerOptimality", goal: "NA", utt: "NA", state: "NA", val: RSAparameters.speakerOptimality},
{key: "alpha", goal: "NA", utt: "NA", state: "NA", val: RSAparameters.alpha},
{key: "cost", goal: "NA", utt: "NA", state: "NA", val: RSAparameters.cost}
])
var returnObj = _.object(map(function(i){
[i.key + "_" + i.goal + "_" + i.state + "_" + i.utt, i.val]
}, returnList))
return returnObj
}
'
Run Full BDA model
bda.utterance.results <- data.frame()
fullModel <- paste(pRSA, dataAnalysisModel, sep = "\n")
# for (si in sites) {
# site.data <- filter(d.state, site == si)
# litSemantics <- filter(litSemantics.results, site == si)
# litSemantics.toPass <- as.list(litSemantics %>%
# select(state, utterance, MAP) %>%
# litSemantics.toPass <- as.list(litSemantics.results %>%
# select(state, utterance, MAP) %>%
# spread(utterance, MAP))
# litSemantics.toPass <- as.list(litSemantics.results) %>%
# select(state, utterance, theta) %>%
# spread(utterance, theta))
litSemantics.toPass <- litSemantics.results
dataToWebPPL <- list(literalSemantics = litSemantics.toPass,
# data = site.data)
data = d.utterance)
# toJSON(as.list(litSemantics.toPass), pretty = T)
# rsa.output <- webppl(pRSA,
# data = dataToWebPPL,
# data_var = "dataFromR")
#
bda.utterance.results <- webppl(fullModel,
data = dataToWebPPL,
data_var = "dataFromR",
inference_opts = list(method = "MCMC",
samples = 400,
burn = 200,
verbose = TRUE),
model_var = "dataAnalysis",
output_format = "samples",
chains = 2,
cores = 2)
#dataToWebPPL
summary(bda.utterance.results$speakerOptimality_NA_NA_NA)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 0.5544 0.8661 2.3360 2.6470 4.7080 5.6040
summary(bda.utterance.results$alpha_NA_NA_NA)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 1.610 1.918 2.703 2.639 3.022 3.887
bda.state.tidy <- bda.utterance.results %>%
select(contains("posteriorPredictive")) %>%
gather(key, val) %>%
mutate(key = gsub("value.", "", key)) %>% # for mht
separate(key, into = c("param", "goal", "state", "positivity", "utterance")) %>%
mutate(utterance = factor(utterance, levels = c("terrible", "bad", "okay", "good", "amazing")),
positivity = factor(positivity, labels = c("neg", "no_neg")),
positivity = factor(positivity, levels = c("no_neg", "neg"))) %>%
group_by(goal, state, positivity, utterance) %>%
# summarise(val = mean(val))
summarize(MAP = estimate_mode(val),
ci_lower = hdi_upper(val),
ci_upper = hdi_lower(val))
# multi_boot_standard(column = "val") %>%
# mutate(val = mean)
ggplot(data=bda.state.tidy, aes(x=positivity, y=MAP, fill=utterance)) +
geom_bar(stat="identity", position=position_dodge()) +
facet_grid(goal~state) +
xlab("no neg (it was ~) vs neg (it wasn't ~) ") +
ylab("proportion chosen") +
ggtitle("What would the speaker say given their goals?") +
geom_errorbar(aes(ymin=ci_lower,ymax=ci_upper),position="dodge") +
geom_hline(yintercept=.1, lty=2)
bda.state.tidy2 <- bda.utterance.results %>%
select(contains("weight")) %>%
gather(key, val) %>%
# mutate(key = gsub("value.", "", key)) %>% # for mht
separate(key, into = c("param", "goal", "state", "utterance")) %>%
ggplot(., aes(x = val))+
geom_histogram(binwidth=0.01)+
facet_grid(goal~param)
# group_by(goal, param) %>%
# summarize(MAP = estimate_mode(val),
# cred_upper = hdi_upper(val),
# cred_lower = hdi_lower(val))
bda.state.tidy2
bda.utterance.results %>%
# select(value.speakerOptimality_NA_NA_NA, value.alpha_NA_NA_NA, value.cost_NA_NA_NA) %>%
select(speakerOptimality_NA_NA_NA, alpha_NA_NA_NA, cost_NA_NA_NA) %>%
gather(key, val) %>%
# mutate(key = gsub("value.", "", key)) %>% # for mht
separate(key, into = c("param", "goal", "state", "utterance")) %>%
ggplot(., aes(x = val))+
geom_histogram()+
facet_wrap(~param)
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.