Ver 6:
some people liked / didn’t like -> likely that everyone liked/didn’t like?
likert scale
3 conds: utterance, no utterance, smudge
rm(list = ls())
library(jsonlite)
library(ggplot2)
library(tidyr)
library(binom)
source("/Users/ericang/Documents/Research/Politeness/experiment/2_code/data_analysis/helper/useful.R")
raw.data.path <- "/Users/ericang/Documents/Research/Politeness/experiment/2_code/production-results/v6/"
## LOOP TO READ IN FILES
all.data <- data.frame()
files <- dir(raw.data.path,pattern="*.json")
for (file.name in files) {
## these are the two functions that are most meaningful
json_file <- readLines(paste(raw.data.path,file.name,sep=""))
json_file_str = paste(json_file, collapse = "")
json_file_str = gsub(",}", "}", json_file_str)
jso = jsonlite::fromJSON(json_file_str)
jso1 <- data.frame(jso)
jso1$subid <- substring(file.name, 1, 6)
## now here's where data get bound together
all.data <- rbind(all.data, jso1)
}
Filter out participants and clean up.
d <- all.data %>%
select(subid, answer.scale, answer.judgment) %>%
rename(judgment = answer.judgment) %>%
filter(answer.scale != "training1" & answer.scale != "training2") %>%
separate(answer.scale, into = c("utterance", "inference", "context"), sep = "_")
d$inference <- as.factor(d$inference)
d$context <- as.factor(d$context)
d1 <- d %>%
mutate(bound = factor(substring(inference, 1, 3),
levels = c("and", "but"),
labels = c("lower", "upper")),
scale = factor(as.numeric(grepl("some", utterance)),
levels = c(0, 1),
labels = c("ad-hoc","scalar")),
utt_valence = factor(as.numeric(grepl("love", utterance,
ignore.case=TRUE)),
levels = c(0, 1),
labels = c("didnt_like", "like")),
condition = str_c(utt_valence, "-", inference, "-", context))
d1$judgment <- as.numeric(d1$judgment)
## for bootstrapping 95% confidence intervals
theta <- function(x,xdata,na.rm=T) {mean(xdata[x],na.rm=na.rm)}
ci.low <- function(x,na.rm=T) {
mean(x,na.rm=na.rm) - quantile(bootstrap(1:length(x),1000,theta,x,na.rm=na.rm)$thetastar,.025,na.rm=na.rm)}
ci.high <- function(x,na.rm=T) {
quantile(bootstrap(1:length(x),1000,theta,x,na.rm=na.rm)$thetastar,.975,na.rm=na.rm) - mean(x,na.rm=na.rm)}
ms <- d1 %>%
# group_by(utt_valence, context) %>%
# mutate(n.total = n()) %>%
group_by(utt_valence, context) %>%
summarize(
mean = mean(judgment),
cih = ci.low(judgment),
cil = ci.high(judgment))
levels(ms$context) <- c("no utterance", "smudged word", "utterance")
qplot(utt_valence, mean, ymax=mean+cih, ymin=mean-cil,
fill = context,
stat="identity", position=position_dodge(width=.9),
geom=c("bar","linerange"),
data=ms) +
xlab("Valence") +
#ylab("Proportion of judgments 'possible that all'") +
#ylim(c(0,1)) +
geom_hline(yintercept=3, lty=2) +
ggtitle("Utterance x Valence")
lmer <- lmer(judgment ~ utt_valence * context + (utt_valence + context | subid), data=d1)
summary(lmer)
## Linear mixed model fit by REML ['lmerMod']
## Formula:
## judgment ~ utt_valence * context + (utt_valence + context | subid)
## Data: d1
##
## REML criterion at convergence: 1982
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -1.9525 -0.8011 0.0909 0.7213 2.0326
##
## Random effects:
## Groups Name Variance Std.Dev. Corr
## subid (Intercept) 0.750 0.866
## utt_valencelike 1.598 1.264 -0.79
## contextsmudge 0.239 0.489 0.17 -0.73
## contextutt 1.249 1.117 -0.40 0.31 -0.26
## Residual 2.858 1.691
## Number of obs: 480, groups: subid, 74
##
## Fixed effects:
## Estimate Std. Error t value
## (Intercept) 3.8876 0.2148 18.10
## utt_valencelike 0.2617 0.3058 0.86
## contextsmudge 0.0943 0.2735 0.34
## contextutt 0.0743 0.2980 0.25
## utt_valencelike:contextsmudge 0.3500 0.3780 0.93
## utt_valencelike:contextutt -0.6000 0.3780 -1.59
##
## Correlation of Fixed Effects:
## (Intr) utt_vl cntxts cntxtt utt_vlnclk:cntxts
## utt_valnclk -0.725
## contextsmdg -0.592 0.353
## contextutt -0.643 0.458 0.415
## utt_vlnclk:cntxts 0.440 -0.618 -0.691 -0.317
## utt_vlnclk:cntxtt 0.440 -0.618 -0.346 -0.634 0.500
Ver 7:
some people liked / didn’t like -> likely that everyone liked/didn’t like?
possible: yes vs no (binomial)
3 conds: utterance, no utterance, smudge
rm(list = ls())
library(jsonlite)
library(ggplot2)
library(tidyr)
library(binom)
source("/Users/ericang/Documents/Research/Politeness/experiment/2_code/data_analysis/helper/useful.R")
raw.data.path <- "/Users/ericang/Documents/Research/Politeness/experiment/2_code/production-results/"
## LOOP TO READ IN FILES
all.data <- data.frame()
files <- dir(raw.data.path,pattern="*.json")
for (file.name in files) {
## these are the two functions that are most meaningful
json_file <- readLines(paste(raw.data.path,file.name,sep=""))
json_file_str = paste(json_file, collapse = "")
json_file_str = gsub(",}", "}", json_file_str)
jso = jsonlite::fromJSON(json_file_str)
jso1 <- data.frame(jso)
jso1$subid <- substring(file.name, 1, 6)
## now here's where data get bound together
all.data <- rbind(all.data, jso1)
}
Filter out participants and clean up.
d <- all.data %>%
select(subid, answer.scale, answer.judgment) %>%
rename(judgment = answer.judgment) %>%
filter(answer.scale != "training1" & answer.scale != "training2") %>%
separate(answer.scale, into = c("utterance", "inference", "context"), sep = "_")
d$inference <- as.factor(d$inference)
d$context <- as.factor(d$context)
d1 <- d %>%
mutate(bound = factor(substring(inference, 1, 3),
levels = c("and", "but"),
labels = c("lower", "upper")),
scale = factor(as.numeric(grepl("some", utterance)),
levels = c(0, 1),
labels = c("ad-hoc","scalar")),
utt_valence = factor(as.numeric(grepl("love", utterance,
ignore.case=TRUE)),
levels = c(0, 1),
labels = c("didnt_like", "like")),
condition = str_c(utt_valence, "-", inference, "-", context))
ms <- d1 %>%
group_by(utt_valence, context) %>%
mutate(n.total = n()) %>%
group_by(utt_valence, context, judgment) %>%
summarize(n = n(),
n.total = n.total[1],
mean = n / n.total,
cih = binom.bayes(n, n.total)$lower,
cil = binom.bayes(n, n.total)$upper)
qplot(utt_valence, mean, ymax=cih, ymin=cil,
fill = context,
stat="identity", position=position_dodge(width=.9),
geom=c("bar","linerange"),
data=subset(ms, judgment == "yes")) +
xlab("Valence") +
ylab("Proportion of judgments 'possible that all'") +
ylim(c(0,1)) +
geom_hline(yintercept=.5, lty=2) +
ggtitle("Utterance x Valence")
lmer <- glmer(judgment ~ utt_valence * context + (1 | subid) + (utt_valence | context), data=d1, family=binomial)
summary(lmer)
## Generalized linear mixed model fit by maximum likelihood (Laplace
## Approximation) [glmerMod]
## Family: binomial ( logit )
## Formula: judgment ~ utt_valence * context + (1 | subid) + (utt_valence |
## context)
## Data: d1
##
## AIC BIC logLik deviance df.resid
## 689.1 733.1 -334.5 669.1 590
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -3.257 -0.672 0.337 0.554 2.915
##
## Random effects:
## Groups Name Variance Std.Dev. Corr
## subid (Intercept) 1.68e+00 1.297290
## context (Intercept) 2.99e-08 0.000173
## utt_valencelike 1.97e-08 0.000140 0.51
## Number of obs: 600, groups: subid, 90; context, 3
##
## Fixed effects:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 1.843 0.326 5.65 1.6e-08 ***
## utt_valencelike -0.492 0.377 -1.30 0.19284
## contextsmudge -1.258 0.369 -3.41 0.00065 ***
## contextutt -1.530 0.370 -4.14 3.5e-05 ***
## utt_valencelike:contextsmudge 1.994 0.541 3.69 0.00022 ***
## utt_valencelike:contextutt -0.209 0.501 -0.42 0.67679
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Correlation of Fixed Effects:
## (Intr) utt_vl cntxts cntxtt utt_vlnclk:cntxts
## utt_valnclk -0.645
## contextsmdg -0.690 0.565
## contextutt -0.700 0.566 0.600
## utt_vlnclk:cntxts 0.488 -0.706 -0.693 -0.422
## utt_vlnclk:cntxtt 0.462 -0.749 -0.414 -0.702 0.519