Ver 8:

some people liked / didn’t like -> likelihood for each number of people 3 conds: utterance, no utterance, smudge

rm(list = ls())
library(jsonlite)
library(ggplot2)
library(tidyr)
library(binom)
source("/Users/ericang/Documents/Research/Politeness/experiment/2_code/data_analysis/helper/useful.R")

raw.data.path <- "/Users/ericang/Documents/Research/Politeness/experiment/2_code/production-results/"

## LOOP TO READ IN FILES
all.data <- data.frame()
files <- dir(raw.data.path,pattern="*.json")

for (file.name in files) {
  
  ## these are the two functions that are most meaningful
  json_file <- readLines(paste(raw.data.path,file.name,sep=""))
  json_file_str = paste(json_file, collapse = "")
  json_file_str = gsub(",}", "}", json_file_str)
  jso = jsonlite::fromJSON(json_file_str)
  jso1 <- data.frame(jso)
  jso1$subid <- substring(file.name, 1, 6)
  
  ## now here's where data get bound together
  all.data <- rbind(all.data, jso1)
}

Filter out participants and clean up.

d <- all.data %>%
  select(subid, answer.valence, answer.utterance, num_range("answer.inferredProb", 0:6)) %>%
  gather(pplNum, prob, num_range("answer.inferredProb", 0:6)) %>%
  mutate(pplNum = substr(pplNum, 20, 20)) %>%
  mutate(valence = answer.valence) %>%
  mutate(utterance = answer.utterance) %>%
  select(subid, valence, utterance, pplNum, prob)
d$prob <- as.numeric(d$prob)
d$pplNum <- as.numeric(d$pplNum)
d$utterance <- factor(d$utterance, levels = c("noUtterance", "partialUtterance", "wholeUtterance"))

## for bootstrapping 95% confidence intervals
theta <- function(x,xdata,na.rm=T) {mean(xdata[x],na.rm=na.rm)}
ci.low <- function(x,na.rm=T) {
  mean(x,na.rm=na.rm) - quantile(bootstrap(1:length(x),1000,theta,x,na.rm=na.rm)$thetastar,.025,na.rm=na.rm)}
ci.high <- function(x,na.rm=T) {
  quantile(bootstrap(1:length(x),1000,theta,x,na.rm=na.rm)$thetastar,.975,na.rm=na.rm) - mean(x,na.rm=na.rm)}


ms <- d %>%
  group_by(valence, utterance, pplNum) %>%
  summarize(
            prob = mean(prob),
           cil = ci.low(prob),
           cih = ci.high(prob))

qplot(pplNum, prob, 
      colour = valence, 
      geom="line",
      data=ms) + 
  facet_wrap(~utterance) +
  xlab("number of people who liked/didn't like") +
  ylab("likelihood that X people liked/didn't like") +
  ggtitle("Valence x Utterance")
## Warning: Removed 1 rows containing missing values (geom_path).

plot of chunk unnamed-chunk-3

lmer <- lmer(prob ~ valence * utterance + (valence | subid), data=d)
summary(lmer)
## Linear mixed model fit by REML ['lmerMod']
## Formula: prob ~ valence * utterance + (valence | subid)
##    Data: d
## 
## REML criterion at convergence: 372.1
## 
## Scaled residuals: 
##     Min      1Q  Median      3Q     Max 
## -2.5646 -0.7175  0.0209  0.7186  2.8510 
## 
## Random effects:
##  Groups   Name        Variance Std.Dev. Corr 
##  subid    (Intercept) 0.01126  0.106         
##           valencelike 0.00292  0.054    -0.28
##  Residual             0.06273  0.250         
## Number of obs: 2127, groups:  subid, 137
## 
## Fixed effects:
##                                        Estimate Std. Error t value
## (Intercept)                            0.505716   0.019701   25.67
## valencelike                           -0.004006   0.020462   -0.20
## utterancepartialUtterance             -0.010053   0.027020   -0.37
## utterancewholeUtterance               -0.009665   0.027084   -0.36
## valencelike:utterancepartialUtterance  0.016695   0.028658    0.58
## valencelike:utterancewholeUtterance   -0.000736   0.028835   -0.03
## 
## Correlation of Fixed Effects:
##                (Intr) vlnclk uttrncpU uttrncwU vlnclk:ttrncpU
## valencelike    -0.526                                        
## uttrncprtlU    -0.692  0.380                                 
## uttrncwhlUt    -0.696  0.380  0.525                          
## vlnclk:ttrncpU  0.372 -0.708 -0.536   -0.273                 
## vlnclk:ttrncwU  0.371 -0.705 -0.272   -0.538    0.507