Ver 8:
some people liked / didn’t like -> likelihood for each number of people 3 conds: utterance, no utterance, smudge
rm(list = ls())
library(jsonlite)
library(ggplot2)
library(tidyr)
library(binom)
source("/Users/ericang/Documents/Research/Politeness/experiment/2_code/data_analysis/helper/useful.R")
raw.data.path <- "/Users/ericang/Documents/Research/Politeness/experiment/2_code/production-results/"
## LOOP TO READ IN FILES
all.data <- data.frame()
files <- dir(raw.data.path,pattern="*.json")
for (file.name in files) {
## these are the two functions that are most meaningful
json_file <- readLines(paste(raw.data.path,file.name,sep=""))
json_file_str = paste(json_file, collapse = "")
json_file_str = gsub(",}", "}", json_file_str)
jso = jsonlite::fromJSON(json_file_str)
jso1 <- data.frame(jso)
jso1$subid <- substring(file.name, 1, 6)
## now here's where data get bound together
all.data <- rbind(all.data, jso1)
}
Filter out participants and clean up.
d <- all.data %>%
select(subid, answer.valence, answer.utterance, num_range("answer.inferredProb", 0:6)) %>%
gather(pplNum, prob, num_range("answer.inferredProb", 0:6)) %>%
mutate(pplNum = substr(pplNum, 20, 20)) %>%
mutate(valence = answer.valence) %>%
mutate(utterance = answer.utterance) %>%
select(subid, valence, utterance, pplNum, prob)
d$prob <- as.numeric(d$prob)
d$pplNum <- as.numeric(d$pplNum)
d$utterance <- factor(d$utterance, levels = c("noUtterance", "partialUtterance", "wholeUtterance"))
## for bootstrapping 95% confidence intervals
theta <- function(x,xdata,na.rm=T) {mean(xdata[x],na.rm=na.rm)}
ci.low <- function(x,na.rm=T) {
mean(x,na.rm=na.rm) - quantile(bootstrap(1:length(x),1000,theta,x,na.rm=na.rm)$thetastar,.025,na.rm=na.rm)}
ci.high <- function(x,na.rm=T) {
quantile(bootstrap(1:length(x),1000,theta,x,na.rm=na.rm)$thetastar,.975,na.rm=na.rm) - mean(x,na.rm=na.rm)}
ms <- d %>%
group_by(valence, utterance, pplNum) %>%
summarize(
prob = mean(prob),
cil = ci.low(prob),
cih = ci.high(prob))
qplot(pplNum, prob,
colour = valence,
geom="line",
data=ms) +
facet_wrap(~utterance) +
xlab("number of people who liked/didn't like") +
ylab("likelihood that X people liked/didn't like") +
ggtitle("Valence x Utterance")
## Warning: Removed 1 rows containing missing values (geom_path).
lmer <- lmer(prob ~ valence * utterance + (valence | subid), data=d)
summary(lmer)
## Linear mixed model fit by REML ['lmerMod']
## Formula: prob ~ valence * utterance + (valence | subid)
## Data: d
##
## REML criterion at convergence: 372.1
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -2.5646 -0.7175 0.0209 0.7186 2.8510
##
## Random effects:
## Groups Name Variance Std.Dev. Corr
## subid (Intercept) 0.01126 0.106
## valencelike 0.00292 0.054 -0.28
## Residual 0.06273 0.250
## Number of obs: 2127, groups: subid, 137
##
## Fixed effects:
## Estimate Std. Error t value
## (Intercept) 0.505716 0.019701 25.67
## valencelike -0.004006 0.020462 -0.20
## utterancepartialUtterance -0.010053 0.027020 -0.37
## utterancewholeUtterance -0.009665 0.027084 -0.36
## valencelike:utterancepartialUtterance 0.016695 0.028658 0.58
## valencelike:utterancewholeUtterance -0.000736 0.028835 -0.03
##
## Correlation of Fixed Effects:
## (Intr) vlnclk uttrncpU uttrncwU vlnclk:ttrncpU
## valencelike -0.526
## uttrncprtlU -0.692 0.380
## uttrncwhlUt -0.696 0.380 0.525
## vlnclk:ttrncpU 0.372 -0.708 -0.536 -0.273
## vlnclk:ttrncwU 0.371 -0.705 -0.272 -0.538 0.507