polimp ver 8 analysis

Ver 8:

some people liked / didn’t like -> likelihood for each number of people 3 conds: utterance, no utterance, smudge

rm(list = ls())
library(jsonlite)
library(ggplot2)
library(tidyr)
library(binom)
source("/Users/ericang/Documents/Research/Politeness/experiment/2_code/data_analysis/helper/useful.R")

raw.data.path <- "/Users/ericang/Documents/Research/Politeness/experiment/2_code/production-results/v8/"

## LOOP TO READ IN FILES
all.data <- data.frame()
files <- dir(raw.data.path,pattern="*.json")

for (file.name in files) {
  
  ## these are the two functions that are most meaningful
  json_file <- readLines(paste(raw.data.path,file.name,sep=""))
  json_file_str = paste(json_file, collapse = "")
  json_file_str = gsub(",}", "}", json_file_str)
  jso = jsonlite::fromJSON(json_file_str)
  jso1 <- data.frame(jso)
  jso1$subid <- substring(file.name, 1, 6)
  jso1$expt <- "first"
  
  ## now here's where data get bound together
  all.data <- rbind(all.data, jso1)
}

all.data1 <- all.data

raw.data.path <- "/Users/ericang/Documents/Research/Politeness/experiment/2_code/production-results/v10/"

## LOOP TO READ IN FILES
all.data <- data.frame()
files <- dir(raw.data.path,pattern="*.json")

for (file.name in files) {
  
  ## these are the two functions that are most meaningful
  json_file <- readLines(paste(raw.data.path,file.name,sep=""))
  json_file_str = paste(json_file, collapse = "")
  json_file_str = gsub(",}", "}", json_file_str)
  jso = jsonlite::fromJSON(json_file_str)
  jso1 <- data.frame(jso)
  jso1$subid <- substring(file.name, 1, 6)
  jso1$expt <- "second"
  
  ## now here's where data get bound together
  all.data <- rbind(all.data, jso1)
}

all.data2 <- rbind(all.data1, all.data)

Filter out participants and clean up.

# code was messed up for these participants
filtered <- all.data2 %>% filter(expt == "first" & answer.utterance == "partialUtterance" & answer.valence == "like" & answer.domain == "recipe")

all.data2 <- all.data2[!all.data2$subid %in% filtered$subid,]

d <- all.data2 %>%
  select(subid, answer.order, answer.valence, answer.utterance, num_range("answer.inferredProb", 0:6)) %>%
  gather(pplNum, prob, num_range("answer.inferredProb", 0:6)) %>%
  mutate(pplNum = substr(pplNum, 20, 20)) %>%
  mutate(valence = answer.valence) %>%
  mutate(utterance = answer.utterance) %>%
  select(subid, answer.order, valence, utterance, pplNum, prob)

d$prob <- as.numeric(d$prob)
d$pplNum <- as.numeric(d$pplNum)
d$utterance <- factor(d$utterance, levels = c("noUtterance", "partialUtterance", "wholeUtterance"))

## for bootstrapping 95% confidence intervals
theta <- function(x,xdata,na.rm=T) {mean(xdata[x],na.rm=na.rm)}
ci.low <- function(x,na.rm=T) {
  mean(x,na.rm=na.rm) - quantile(bootstrap(1:length(x),1000,theta,x,na.rm=na.rm)$thetastar,.025,na.rm=na.rm)}
ci.high <- function(x,na.rm=T) {
  quantile(bootstrap(1:length(x),1000,theta,x,na.rm=na.rm)$thetastar,.975,na.rm=na.rm) - mean(x,na.rm=na.rm)}

mss <- d %>%
  group_by(valence, utterance, pplNum, subid) %>%
  summarize(
            prob = mean(prob, na.rm=TRUE)
          )

ms <- aggregate(prob ~ valence + utterance + pplNum, mss, mean)
ms$cih <- aggregate(prob ~ valence + utterance + pplNum, mss, ci.high)$prob
ms$cil <- aggregate(prob ~ valence + utterance + pplNum, mss, ci.low)$prob

qplot(pplNum, prob, 
      colour = valence, 
      geom="line",
      data=subset(ms, prob!="NA")) + 
  facet_wrap(~utterance) +
  xlab("number of people who liked/didn't like") +
  ylab("likelihood that X people liked/didn't like") +
  ggtitle("Valence x Utterance") +
  geom_errorbar(aes(ymin=prob-cil,ymax=prob+cih,width=.1))

plot of chunk unnamed-chunk-3

# order effect?
mss <- d %>%
  group_by(valence, utterance, pplNum, answer.order, subid) %>%
  summarize(
            prob = mean(prob, na.rm=TRUE)
          )

ms <- aggregate(prob ~ valence + utterance + pplNum + answer.order, mss, mean)
ms$cih <- aggregate(prob ~ valence + utterance + pplNum + answer.order, mss, ci.high)$prob
ms$cil <- aggregate(prob ~ valence + utterance + pplNum + answer.order, mss, ci.low)$prob

qplot(pplNum, prob, 
      colour = valence, 
      geom="line",
      data=subset(ms, prob!="NA")) + 
  facet_grid(answer.order~utterance) +
  xlab("number of people who liked/didn't like") +
  ylab("likelihood that X people liked/didn't like") +
  ggtitle("Valence x Utterance") +
  geom_errorbar(aes(ymin=prob-cil,ymax=prob+cih,width=.1))

plot of chunk unnamed-chunk-3

lmer <- lmer(prob ~ valence * utterance + (valence | subid), data=d)
summary(lmer)

## Linear mixed model fit by REML ['lmerMod']
## Formula: prob ~ valence * utterance + (valence | subid)
##    Data: d
## 
## REML criterion at convergence: 528.5
## 
## Scaled residuals: 
##     Min      1Q  Median      3Q     Max 
## -2.6009 -0.7292  0.0535  0.6836  2.7449 
## 
## Random effects:
##  Groups   Name        Variance Std.Dev. Corr 
##  subid    (Intercept) 0.00992  0.0996        
##           valencelike 0.00799  0.0894   -0.37
##  Residual             0.06320  0.2514        
## Number of obs: 2868, groups:  subid, 180
## 
## Fixed effects:
##                                       Estimate Std. Error t value
## (Intercept)                            0.50535    0.01583    31.9
## valencelike                           -0.00692    0.01898    -0.4
## utterancepartialUtterance             -0.00260    0.02253    -0.1
## utterancewholeUtterance               -0.03885    0.02156    -1.8
## valencelike:utterancepartialUtterance  0.05486    0.02768     2.0
## valencelike:utterancewholeUtterance    0.02417    0.02646     0.9
## 
## Correlation of Fixed Effects:
##                (Intr) vlnclk uttrncpU uttrncwU vlnclk:ttrncpU
## valencelike    -0.573                                        
## uttrncprtlU    -0.646  0.392                                 
## uttrncwhlUt    -0.676  0.410  0.464                          
## vlnclk:ttrncpU  0.383 -0.662 -0.592   -0.279                 
## vlnclk:ttrncwU  0.401 -0.693 -0.279   -0.591    0.470

polimp ver 8 analysis

EJY, MCF

May 8, 2015