polimp ver 6 & 7 analysis

Ver 6:

some people liked / didn’t like -> likely that everyone liked/didn’t like?
likert scale
3 conds: utterance, no utterance, smudge

rm(list = ls())
library(jsonlite)
library(ggplot2)
library(tidyr)
library(binom)
source("/Users/ericang/Documents/Research/Politeness/experiment/2_code/data_analysis/helper/useful.R")

raw.data.path <- "/Users/ericang/Documents/Research/Politeness/experiment/2_code/production-results/v6/"


## LOOP TO READ IN FILES
all.data <- data.frame()
files <- dir(raw.data.path,pattern="*.json")

for (file.name in files) {
  
  ## these are the two functions that are most meaningful
  json_file <- readLines(paste(raw.data.path,file.name,sep=""))
  json_file_str = paste(json_file, collapse = "")
  json_file_str = gsub(",}", "}", json_file_str)
  jso = jsonlite::fromJSON(json_file_str)
  jso1 <- data.frame(jso)
  jso1$subid <- substring(file.name, 1, 6)
  
  ## now here's where data get bound together
  all.data <- rbind(all.data, jso1)
}

Filter out participants and clean up.

d <- all.data %>%
  select(subid, answer.scale, answer.judgment) %>%
  rename(judgment = answer.judgment) %>%
  filter(answer.scale != "training1" & answer.scale != "training2") %>%
  separate(answer.scale, into = c("utterance", "inference", "context"), sep = "_") 

d$inference <- as.factor(d$inference)
d$context <- as.factor(d$context)

d1 <- d %>%
  mutate(bound = factor(substring(inference, 1, 3),
                           levels = c("and", "but"),
                           labels = c("lower", "upper")),
         scale = factor(as.numeric(grepl("some", utterance)), 
                        levels = c(0, 1), 
                        labels = c("ad-hoc","scalar")),
         utt_valence = factor(as.numeric(grepl("love", utterance, 
                                               ignore.case=TRUE)), 
                            levels = c(0, 1), 
                            labels = c("didnt_like", "like")),
         condition = str_c(utt_valence, "-", inference, "-", context))

d1$judgment <- as.numeric(d1$judgment)

## for bootstrapping 95% confidence intervals
theta <- function(x,xdata,na.rm=T) {mean(xdata[x],na.rm=na.rm)}
ci.low <- function(x,na.rm=T) {
  mean(x,na.rm=na.rm) - quantile(bootstrap(1:length(x),1000,theta,x,na.rm=na.rm)$thetastar,.025,na.rm=na.rm)}
ci.high <- function(x,na.rm=T) {
  quantile(bootstrap(1:length(x),1000,theta,x,na.rm=na.rm)$thetastar,.975,na.rm=na.rm) - mean(x,na.rm=na.rm)}


ms <- d1 %>%
#  group_by(utt_valence, context) %>%
#  mutate(n.total = n()) %>%
  group_by(utt_valence, context) %>%
  summarize(
            mean = mean(judgment),
            cih = ci.low(judgment),
            cil = ci.high(judgment))
levels(ms$context) <- c("no utterance", "smudged word", "utterance")


qplot(utt_valence, mean, ymax=mean+cih, ymin=mean-cil, 
      fill = context, 
      stat="identity", position=position_dodge(width=.9),
      geom=c("bar","linerange"),
      data=ms) + 
  xlab("Valence") +
  #ylab("Proportion of judgments 'possible that all'") + 
  #ylim(c(0,1)) + 
  geom_hline(yintercept=3, lty=2) +
  ggtitle("Utterance x Valence")

plot of chunk unnamed-chunk-3

lmer <- lmer(judgment ~ utt_valence * context + (utt_valence + context | subid), data=d1)
summary(lmer)

## Linear mixed model fit by REML ['lmerMod']
## Formula: 
## judgment ~ utt_valence * context + (utt_valence + context | subid)
##    Data: d1
## 
## REML criterion at convergence: 1982
## 
## Scaled residuals: 
##     Min      1Q  Median      3Q     Max 
## -1.9525 -0.8011  0.0909  0.7213  2.0326 
## 
## Random effects:
##  Groups   Name            Variance Std.Dev. Corr             
##  subid    (Intercept)     0.750    0.866                     
##           utt_valencelike 1.598    1.264    -0.79            
##           contextsmudge   0.239    0.489     0.17 -0.73      
##           contextutt      1.249    1.117    -0.40  0.31 -0.26
##  Residual                 2.858    1.691                     
## Number of obs: 480, groups:  subid, 74
## 
## Fixed effects:
##                               Estimate Std. Error t value
## (Intercept)                     3.8876     0.2148   18.10
## utt_valencelike                 0.2617     0.3058    0.86
## contextsmudge                   0.0943     0.2735    0.34
## contextutt                      0.0743     0.2980    0.25
## utt_valencelike:contextsmudge   0.3500     0.3780    0.93
## utt_valencelike:contextutt     -0.6000     0.3780   -1.59
## 
## Correlation of Fixed Effects:
##                   (Intr) utt_vl cntxts cntxtt utt_vlnclk:cntxts
## utt_valnclk       -0.725                                       
## contextsmdg       -0.592  0.353                                
## contextutt        -0.643  0.458  0.415                         
## utt_vlnclk:cntxts  0.440 -0.618 -0.691 -0.317                  
## utt_vlnclk:cntxtt  0.440 -0.618 -0.346 -0.634  0.500

Ver 7:

some people liked / didn’t like -> likely that everyone liked/didn’t like?
possible: yes vs no (binomial)
3 conds: utterance, no utterance, smudge

rm(list = ls())
library(jsonlite)
library(ggplot2)
library(tidyr)
library(binom)
source("/Users/ericang/Documents/Research/Politeness/experiment/2_code/data_analysis/helper/useful.R")

raw.data.path <- "/Users/ericang/Documents/Research/Politeness/experiment/2_code/production-results/"

## LOOP TO READ IN FILES
all.data <- data.frame()
files <- dir(raw.data.path,pattern="*.json")

for (file.name in files) {
  
  ## these are the two functions that are most meaningful
  json_file <- readLines(paste(raw.data.path,file.name,sep=""))
  json_file_str = paste(json_file, collapse = "")
  json_file_str = gsub(",}", "}", json_file_str)
  jso = jsonlite::fromJSON(json_file_str)
  jso1 <- data.frame(jso)
  jso1$subid <- substring(file.name, 1, 6)
  
  ## now here's where data get bound together
  all.data <- rbind(all.data, jso1)
}

Filter out participants and clean up.

d <- all.data %>%
  select(subid, answer.scale, answer.judgment) %>%
  rename(judgment = answer.judgment) %>%
  filter(answer.scale != "training1" & answer.scale != "training2") %>%
  separate(answer.scale, into = c("utterance", "inference", "context"), sep = "_") 

d$inference <- as.factor(d$inference)
d$context <- as.factor(d$context)

d1 <- d %>%
  mutate(bound = factor(substring(inference, 1, 3),
                           levels = c("and", "but"),
                           labels = c("lower", "upper")),
         scale = factor(as.numeric(grepl("some", utterance)), 
                        levels = c(0, 1), 
                        labels = c("ad-hoc","scalar")),
         utt_valence = factor(as.numeric(grepl("love", utterance, 
                                               ignore.case=TRUE)), 
                            levels = c(0, 1), 
                            labels = c("didnt_like", "like")),
         condition = str_c(utt_valence, "-", inference, "-", context))

ms <- d1 %>%
  group_by(utt_valence, context) %>%
  mutate(n.total = n()) %>%
  group_by(utt_valence, context, judgment) %>%
  summarize(n = n(),
            n.total = n.total[1],
            mean = n / n.total,
            cih = binom.bayes(n, n.total)$lower,
            cil = binom.bayes(n, n.total)$upper)

qplot(utt_valence, mean, ymax=cih, ymin=cil, 
      fill = context, 
      stat="identity", position=position_dodge(width=.9),
      geom=c("bar","linerange"),
      data=subset(ms, judgment == "yes")) + 
  xlab("Valence") +
  ylab("Proportion of judgments 'possible that all'") + 
  ylim(c(0,1)) + 
  geom_hline(yintercept=.5, lty=2) +
  ggtitle("Utterance x Valence")

plot of chunk unnamed-chunk-7

lmer <- glmer(judgment ~ utt_valence * context + (1 | subid) + (utt_valence | context), data=d1, family=binomial)
summary(lmer)

## Generalized linear mixed model fit by maximum likelihood (Laplace
##   Approximation) [glmerMod]
##  Family: binomial  ( logit )
## Formula: judgment ~ utt_valence * context + (1 | subid) + (utt_valence |  
##     context)
##    Data: d1
## 
##      AIC      BIC   logLik deviance df.resid 
##    689.1    733.1   -334.5    669.1      590 
## 
## Scaled residuals: 
##    Min     1Q Median     3Q    Max 
## -3.257 -0.672  0.337  0.554  2.915 
## 
## Random effects:
##  Groups  Name            Variance Std.Dev. Corr
##  subid   (Intercept)     1.68e+00 1.297290     
##  context (Intercept)     2.99e-08 0.000173     
##          utt_valencelike 1.97e-08 0.000140 0.51
## Number of obs: 600, groups:  subid, 90; context, 3
## 
## Fixed effects:
##                               Estimate Std. Error z value Pr(>|z|)    
## (Intercept)                      1.843      0.326    5.65  1.6e-08 ***
## utt_valencelike                 -0.492      0.377   -1.30  0.19284    
## contextsmudge                   -1.258      0.369   -3.41  0.00065 ***
## contextutt                      -1.530      0.370   -4.14  3.5e-05 ***
## utt_valencelike:contextsmudge    1.994      0.541    3.69  0.00022 ***
## utt_valencelike:contextutt      -0.209      0.501   -0.42  0.67679    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Correlation of Fixed Effects:
##                   (Intr) utt_vl cntxts cntxtt utt_vlnclk:cntxts
## utt_valnclk       -0.645                                       
## contextsmdg       -0.690  0.565                                
## contextutt        -0.700  0.566  0.600                         
## utt_vlnclk:cntxts  0.488 -0.706 -0.693 -0.422                  
## utt_vlnclk:cntxtt  0.462 -0.749 -0.414 -0.702  0.519

polimp ver 6 & 7 analysis

EJY, MCF

April 20, 2015