Literal semantics data

d.lit <- read.csv(paste(local.path,
                        "experiment/data_analysis/data/literalSemantics_wNeg.csv",
                        sep = ""))

Summarize behavioral data (in terms of number of “yes”es)

d.lit.summary <- d.lit %>%
  group_by(state, utterance) %>%
  summarize(k = sum(judgment),
            n = n())

Model

literalSemanticsModel <- '
var n = data[0]["n"]
var k = data[0]["k"]

var literalSemantics = function(){
    var theta = uniform(0,1)
  observe( Binomial({n: n, p: theta}), k )
  return {theta: theta}
}
'

Run model, for all sites, states, utterances

# sites <- levels(d.lit.summary$site)
states <- levels(factor(d.lit.summary$state))
utterances <- levels(d.lit.summary$utterance)

litSemantics.results <- data.frame()

  for (st in states){
    for (utt in utterances){
      
      d.lit.pass <- d.lit.summary %>% 
        filter((state == st) & (utterance == utt))
      
      rs <- webppl(literalSemanticsModel,
                   data = d.lit.pass,
                   data_var = "data",
                   inference_opts = list(
                     method = "rejection",
                     samples = 10
                   ),
                   model_var = "literalSemantics",
                   output_format = "samples")
      
      rs.summary <- rs %>%
        # summarize(MAP = estimate_mode(theta),
        #           cred_low = hdi_lower(theta),
        #           cred_upper = hdi_upper(theta)) %>%
        mutate(state = st, utterance = utt)
      
      litSemantics.results <- bind_rows(litSemantics.results,
                                        rs.summary)
      print(utt)
    }
    print(st)
  }

# write.csv(litSemantics.results,paste(local.path,
#                          "model/data/literal_semantics_wNeg_10samples_noMAP.csv",sep=""), # CHANGE FILE NAME AS NEEDED
#           row.names=FALSE)

Load literal semantics BDA results

# litSemantics.results <- read.csv(paste(local.path, "model/data/literal_semantics_wNeg_10000samples.csv",
litSemantics.results <- read.csv(paste(local.path, "model/data/literal_semantics_wNeg_10000samples_noMAP.csv",
# litSemantics.results <- read.csv(paste(local.path, "model/data/literal_semantics_wNeg_10samples_noMAP.csv",
                                       sep = ""))
litSemantics.results.org <- litSemantics.results %>%
    mutate(positivity = factor(as.numeric(grepl("yes", utterance)), 
                             levels = c(0, 1), 
                             labels = c("negative","positive"))) %>%
  mutate(utterance = substring(utterance, 5)) %>%
  mutate(utterance = ordered(utterance, levels = c("terrible", "bad", "okay", "good", "amazing")))

ggplot(data=litSemantics.results.org, 
       aes(x=state, y=MAP, col=utterance, group=utterance)) +
  geom_line() +
  facet_grid(positivity~utterance) +
  geom_errorbar(aes(ymin=cred_low,ymax=cred_upper, width=.1))

production prediction data

d.utterance <- read.csv(paste(
  local.path, 
  "experiment/data_analysis/data/speaker.csv"
  , sep="")
)  %>%
    filter(utterance != "NA_NA")
d <- d.utterance %>%
  separate(utterance, into = c("positivity", "utterance"), sep = "_") %>%
  mutate(true_state = as.factor(true_state),
         goal = as.factor(goal),
         positivity = as.factor(positivity),
         utterance = as.factor(utterance)
          )


ms2 <- d %>%
  filter(!is.na(positivity), !is.na(utterance)) %>% # why is there NA?
  group_by(true_state, goal) %>%
  summarise(n.total=n())

ms3 <- d %>%
  filter(!is.na(positivity), !is.na(utterance)) %>% # why is there NA?
  group_by(true_state, goal, positivity, utterance) %>%
  summarize(n = n())

ms <- left_join(ms2, ms3) %>%
  group_by(true_state, goal, positivity, utterance) %>%
  summarize(mean = n / n.total,
            ci_lower = binom.bayes(n, n.total)$lower,
            ci_upper = binom.bayes(n, n.total)$upper) 

ms_fake <- cbind(expand.grid(true_state=levels(ms$true_state),goal=levels(ms$goal),positivity=levels(ms$positivity), utterance=levels(ms$utterance)), mean=NA, ci_lower=NA, ci_upper=NA)
  

ms.all <- rbind(data.frame(ms), data.frame(ms_fake))
levels(ms.all$true_state) <- c("1 heart", "2 hearts", "3 hearts", "4 hearts", "5 hearts")
levels(ms.all$goal) <- c("want both", "want to be informative", "want to make listener feel good")
levels(ms.all$positivity) <- c("negation", "no negation")

ggplot(data=ms.all, aes(x=positivity, y=mean, fill=utterance)) +
  geom_bar(stat="identity", position=position_dodge()) +
  facet_grid(goal~true_state) +
  xlab("no negation (It was ~) vs negation (It wasn't ~) ") +
  ylab("proportion chosen") +
  # ggtitle("What would the speaker say given their goals?") +
  geom_errorbar(aes(ymin=ci_lower,ymax=ci_upper),position="dodge") +
  geom_hline(yintercept=.1, lty=2) +
  scale_fill_discrete(guide = guide_legend(title = "word"))

Polite RSA: production model

pRSA <- '
var utterances = ["yes_terrible","yes_bad","yes_okay","yes_good","yes_amazing",
                  "not_terrible","not_bad","not_okay","not_good","not_amazing"
                  //                  ,"nullUtt"
                  ];

// var states = ["1","2","3","4","5"];
var states = [1,2,3,4,5];
var statePrior = function(){
  return uniformDraw(states);
};

var isNegation = function(utt){
  return (utt.split("_") == "not")
};

var cost_yes = 1;
var uttCosts = function(cost_neg){
  map(function(u) {return isNegation(u) ? Math.exp(-cost_neg) : Math.exp(-cost_yes)},
        utterances)
}

var utterancePrior = function(cost_neg){
  //  return uniformDraw(utterances)
  return  utterances[discrete(uttCosts(cost_neg))];
};

// model parameters
// var alpha = 1.25;
// var speakerOptimality = 10;

// measured in Experiment 1
var literalSemantics = dataFromR.literalSemantics;
//display(literalSemantics)


var litSemanticsPosteriorObj = _.object(map(function(u){
  return [u, _.object(map(function(s){
    [s, _.pluck(_.where(literalSemantics, {state: s, utterance: u}), "theta")]
//  var lst = _.pluck(_.where(literalSemantics, {state: s, utterance: u}), "theta")
//  [s, lst]  
}, states))]
}, utterances))
//display(litSemanticsPosteriorObj)
// e.g. {"amazing": { 1: [w1, w2, ... , wn], 2: [ ... ], ... }, "terrible": {1: [, ...]} }

var honestyWeights = [1,1,1,1,1]
var kindnessWeights = [1,1,1,1,1]

//var meaning = function(words, state){
//  return flip(literalSemantics[words][state]);
//}; 
//display(meaning("yes_terrible", "1"))

var meaning = function(literalSemantics, words, state){
  return flip(literalSemantics[words][state]);
}; 

var listener0 = cache(function(literalSemantics, utterance) {
  Infer({method: "enumerate"}, function(){
    var state = statePrior();
    var m = meaning(literalSemantics, utterance, state);
    condition(m);
    //display(state)
    return state;
  });

}, 10000);

var speaker1 = cache(function(literalSemantics, exptCondInfo, rsaParameters) {
  Infer({method: "enumerate"}, function(){
    var state = exptCondInfo.state;
    var speakerGoals = exptCondInfo.goalWeights;
    var utterance = utterancePrior(rsaParameters.cost);
    
    var speakerOptimality = rsaParameters.speakerOptimality;
    var alpha = rsaParameters.alpha;
    //display("so = " + speakerOptimality)
    //display("alpha = " + alpha)
    
    var L0 = listener0(literalSemantics, utterance);
    
    var epistemicUtility = L0.score(state);
    var socialUtility = expectation(L0, function(s){return alpha*s});
    
    var eUtility = speakerGoals.honesty*epistemicUtility;
    var sUtility = speakerGoals.kindness*socialUtility;

    var speakerUtility = eUtility+sUtility;

    factor(speakerOptimality*speakerUtility);
    
    //display(utterance)
    return utterance;
  })
}, 10000)

//var listener1 = cache(function(utterance, rsaParameters) {
//  Infer({method: "enumerate"}, function(){
//    var speakerGoals = {
//      honesty: [0.1, 0.3, 0.5, 0.7, 0.9][discrete(honestyWeights)],
//      kindness: [0.1, 0.3, 0.5, 0.7, 0.9][discrete(kindnessWeights)]
//    }
//    
//    var state = statePrior()
//    
//    var S1 = speaker1(state, speakerGoals, rsaParameters)
//    
//    observe(S1, utterance)
//    
//    return {
//      state: state,
//      goals: speakerGoals
//    }
//      })
//}, 10)
//
//var speaker2 = cache(function(exptCondInfo, rsaParameters) {
//  Enumerate(function(){
//    var state = exptCondInfo.state;
//    var intendedGoals = exptCondInfo.goalWeights;
//    var utterance = utterancePrior()
//    
//    var L1 = listener1(utterance, rsaParameters)
//
//    factor(L1.score({"state":state, "goals":intendedGoals}))
//    return utterance
//        display(d)
//    
//  })
//}, 10)

'

Data analysis model

dataAnalysisModel <- '
// foreach helper function
var foreach = function(fn, lst) {
  var foreach_ = function(i) {
    if (i < lst.length) {
      fn(lst[i]);
      foreach_(i + 1);
    }
  };
  foreach_(0);
};
//
  
  var data = dataFromR.data;
  
  var goals = _.uniq(_.pluck(data, "goal"));

  var states = _.uniq(_.pluck(data, "true_state"));
  // var utterances = _.uniq(_.pluck(data, "utterance"));
  
  var dataAnalysis = function(){
    
  var litSemantics = _.object(map(function(u){
    return [u, _.object(map(function(s){
    [s, uniformDraw(litSemanticsPosteriorObj[u][s])]
  }, states))]
}, utterances))

  //display(litSemantics)
      

    var RSAparameters = {
      speakerOptimality: uniformDrift({a: 0, b: 20, width:2}),
      alpha: uniformDrift({a: 0, b: 5, width:0.5}),
      cost: uniformDrift({a: 1, b: 5, width:0.25})
      //literalSemantics: litSemantics
    };

    
    var goalWeightsAndPostPred = map(function(goal){
      
      var goalWeights = {
          honesty: uniformDrift({a: 0, b: 1, width:0.2}), 
          kindness: uniformDrift({a: 0, b: 1, width:0.2})
      //honesty: uniformDraw([0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9]),
      //kindness: uniformDraw([0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9])
    }

      var postPred = map(function(state){
        
      var utteranceData = _.pluck(_.where(data, {true_state: state, goal: goal}), "utterance"); 
        
        var exptConditionInfo = {
          state: state, 
          utterance: false,
          goalWeights: goalWeights
        };

        var literalSemantics = litSemantics;
        
        var RSApredictions = speaker1(literalSemantics, exptConditionInfo, RSAparameters);
        //RSApredictions.support()

        // var linkedRSA = linkingFunction(RSApredictions);
        
        
        mapData({data: utteranceData}, 
            function(d){ 
                // display(RSApredictions.score(d))
                //display("d = " + d +  " ___ scr = " + RSApredictions.score(d) )
                observe(RSApredictions, d) 
            });
        // mapData({data: utteranceData}, function(d){ observe(linkedRSA, d) });
        
        var postSupport = RSApredictions.support(); // all utterances in the posterior

        var postPredictive = map(function(u){
          return {
            key: "posteriorPredictive",
            goal: goal,
            state: state,
            utt: u,
            val: Math.exp(RSApredictions.score(u))
          }
        }, postSupport)
        
        return postPredictive
        
      }, states)
      
      
      return [postPred, 
              {key: "weightHonest", goal: goal, state: "NA", utt: "NA", val: goalWeights.honesty},
              {key: "weightKind", goal: goal, state: "NA", utt: "NA", val: goalWeights.kindness}
              // {key: "weightMean", goal: goal, state: "NA", utt: "NA", val: goalWeights.meanness}
              ]
      
    }, goals)

    var returnList = _.flatten([goalWeightsAndPostPred, 
                                //litSemantics,
                                {key: "speakerOptimality", goal: "NA", utt: "NA", state: "NA", val: RSAparameters.speakerOptimality},
                                {key: "alpha", goal: "NA", utt: "NA", state: "NA", val: RSAparameters.alpha},
                                {key: "cost", goal: "NA", utt: "NA", state: "NA", val: RSAparameters.cost}
                                ])
    
    var returnObj = _.object(map(function(i){
      [i.key + "_" + i.goal + "_" + i.state + "_" + i.utt, i.val]
    }, returnList))

    return returnObj
    
  }

    
'

Run Full BDA model

bda.utterance.results <- data.frame()

fullModel <- paste(pRSA, dataAnalysisModel, sep = "\n")

# for (si in sites) {

  # site.data <- filter(d.state, site == si)
  # litSemantics <- filter(litSemantics.results, site == si)
  
  # litSemantics.toPass <- as.list(litSemantics %>%
  # select(state, utterance, MAP) %>%
   # litSemantics.toPass <- as.list(litSemantics.results %>%
   #                                select(state, utterance, MAP) %>%
   #                                 spread(utterance, MAP))
   # litSemantics.toPass <- as.list(litSemantics.results) %>%
   #                                  select(state, utterance, theta) %>%
   #                                  spread(utterance, theta))
     litSemantics.toPass <- litSemantics.results

  dataToWebPPL <- list(literalSemantics = litSemantics.toPass,
                       # data = site.data)
                       data = d.utterance)
  
  # toJSON(as.list(litSemantics.toPass), pretty = T)
  
  # rsa.output <- webppl(pRSA,
  #                    data = dataToWebPPL,
  #                    data_var = "dataFromR")
  # 
  bda.utterance.results <- webppl(fullModel,
                              data = dataToWebPPL,
                              data_var = "dataFromR",
                              inference_opts = list(method = "MCMC", 
                                                    samples = 400,
                                                    burn = 200,
                                                    verbose = TRUE),
                              model_var = "dataAnalysis",
                              output_format = "samples",
                              chains = 2,
                              cores = 2)
  #dataToWebPPL
summary(bda.utterance.results$speakerOptimality_NA_NA_NA)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##  0.5544  0.8661  2.3360  2.6470  4.7080  5.6040
summary(bda.utterance.results$alpha_NA_NA_NA)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##   1.610   1.918   2.703   2.639   3.022   3.887
bda.state.tidy <- bda.utterance.results %>% 
    select(contains("posteriorPredictive")) %>% 
    gather(key, val) %>% 
    mutate(key = gsub("value.", "", key)) %>% # for mht
    separate(key, into = c("param", "goal", "state", "positivity", "utterance")) %>%
  mutate(utterance = factor(utterance, levels = c("terrible", "bad", "okay", "good", "amazing")),
         positivity = factor(positivity, labels = c("neg", "no_neg")), 
         positivity = factor(positivity, levels = c("no_neg", "neg"))) %>%
    group_by(goal, state, positivity, utterance) %>%
  # summarise(val = mean(val))
  summarize(MAP = estimate_mode(val),
            ci_lower = hdi_upper(val),
            ci_upper = hdi_lower(val))
   # multi_boot_standard(column = "val") %>%
  # mutate(val = mean)

ggplot(data=bda.state.tidy, aes(x=positivity, y=MAP, fill=utterance)) +
  geom_bar(stat="identity", position=position_dodge()) +
  facet_grid(goal~state) +
  xlab("no neg (it was ~) vs neg (it wasn't ~) ") +
  ylab("proportion chosen") +
  ggtitle("What would the speaker say given their goals?") +
  geom_errorbar(aes(ymin=ci_lower,ymax=ci_upper),position="dodge") +
  geom_hline(yintercept=.1, lty=2)

bda.state.tidy2 <- bda.utterance.results %>% 
    select(contains("weight")) %>%
    gather(key, val) %>% 
    # mutate(key = gsub("value.", "", key)) %>% # for mht
    separate(key, into = c("param", "goal", "state", "utterance")) %>%
  ggplot(., aes(x = val))+
  geom_histogram(binwidth=0.01)+
  facet_grid(goal~param)
  

# group_by(goal, param) %>%
#   summarize(MAP = estimate_mode(val),
#             cred_upper = hdi_upper(val),
#             cred_lower = hdi_lower(val))

bda.state.tidy2

bda.utterance.results %>% 
  # select(value.speakerOptimality_NA_NA_NA, value.alpha_NA_NA_NA, value.cost_NA_NA_NA) %>%
  select(speakerOptimality_NA_NA_NA, alpha_NA_NA_NA, cost_NA_NA_NA) %>%
    gather(key, val) %>% 
    # mutate(key = gsub("value.", "", key)) %>% # for mht
    separate(key, into = c("param", "goal", "state", "utterance")) %>%
  ggplot(., aes(x = val))+
  geom_histogram()+
  facet_wrap(~param)
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.