Ver 8:
some people liked / didn’t like -> likelihood for each number of people 3 conds: utterance, no utterance, smudge
rm(list = ls())
library(jsonlite)
library(ggplot2)
library(tidyr)
library(binom)
source("/Users/ericang/Documents/Research/Politeness/experiment/2_code/data_analysis/helper/useful.R")
raw.data.path <- "/Users/ericang/Documents/Research/Politeness/experiment/2_code/production-results/v8/"
## LOOP TO READ IN FILES
all.data <- data.frame()
files <- dir(raw.data.path,pattern="*.json")
for (file.name in files) {
## these are the two functions that are most meaningful
json_file <- readLines(paste(raw.data.path,file.name,sep=""))
json_file_str = paste(json_file, collapse = "")
json_file_str = gsub(",}", "}", json_file_str)
jso = jsonlite::fromJSON(json_file_str)
jso1 <- data.frame(jso)
jso1$subid <- substring(file.name, 1, 6)
jso1$expt <- "first"
## now here's where data get bound together
all.data <- rbind(all.data, jso1)
}
all.data1 <- all.data
raw.data.path <- "/Users/ericang/Documents/Research/Politeness/experiment/2_code/production-results/v10/"
## LOOP TO READ IN FILES
all.data <- data.frame()
files <- dir(raw.data.path,pattern="*.json")
for (file.name in files) {
## these are the two functions that are most meaningful
json_file <- readLines(paste(raw.data.path,file.name,sep=""))
json_file_str = paste(json_file, collapse = "")
json_file_str = gsub(",}", "}", json_file_str)
jso = jsonlite::fromJSON(json_file_str)
jso1 <- data.frame(jso)
jso1$subid <- substring(file.name, 1, 6)
jso1$expt <- "second"
## now here's where data get bound together
all.data <- rbind(all.data, jso1)
}
all.data2 <- rbind(all.data1, all.data)
Filter out participants and clean up.
# code was messed up for these participants
filtered <- all.data2 %>% filter(expt == "first" & answer.utterance == "partialUtterance" & answer.valence == "like" & answer.domain == "recipe")
all.data2 <- all.data2[!all.data2$subid %in% filtered$subid,]
d <- all.data2 %>%
select(subid, answer.order, answer.valence, answer.utterance, num_range("answer.inferredProb", 0:6)) %>%
gather(pplNum, prob, num_range("answer.inferredProb", 0:6)) %>%
mutate(pplNum = substr(pplNum, 20, 20)) %>%
mutate(valence = answer.valence) %>%
mutate(utterance = answer.utterance) %>%
select(subid, answer.order, valence, utterance, pplNum, prob)
d$prob <- as.numeric(d$prob)
d$pplNum <- as.numeric(d$pplNum)
d$utterance <- factor(d$utterance, levels = c("noUtterance", "partialUtterance", "wholeUtterance"))
## for bootstrapping 95% confidence intervals
theta <- function(x,xdata,na.rm=T) {mean(xdata[x],na.rm=na.rm)}
ci.low <- function(x,na.rm=T) {
mean(x,na.rm=na.rm) - quantile(bootstrap(1:length(x),1000,theta,x,na.rm=na.rm)$thetastar,.025,na.rm=na.rm)}
ci.high <- function(x,na.rm=T) {
quantile(bootstrap(1:length(x),1000,theta,x,na.rm=na.rm)$thetastar,.975,na.rm=na.rm) - mean(x,na.rm=na.rm)}
mss <- d %>%
group_by(valence, utterance, pplNum, subid) %>%
summarize(
prob = mean(prob, na.rm=TRUE)
)
ms <- aggregate(prob ~ valence + utterance + pplNum, mss, mean)
ms$cih <- aggregate(prob ~ valence + utterance + pplNum, mss, ci.high)$prob
ms$cil <- aggregate(prob ~ valence + utterance + pplNum, mss, ci.low)$prob
qplot(pplNum, prob,
colour = valence,
geom="line",
data=subset(ms, prob!="NA")) +
facet_wrap(~utterance) +
xlab("number of people who liked/didn't like") +
ylab("likelihood that X people liked/didn't like") +
ggtitle("Valence x Utterance") +
geom_errorbar(aes(ymin=prob-cil,ymax=prob+cih,width=.1))
# order effect?
mss <- d %>%
group_by(valence, utterance, pplNum, answer.order, subid) %>%
summarize(
prob = mean(prob, na.rm=TRUE)
)
ms <- aggregate(prob ~ valence + utterance + pplNum + answer.order, mss, mean)
ms$cih <- aggregate(prob ~ valence + utterance + pplNum + answer.order, mss, ci.high)$prob
ms$cil <- aggregate(prob ~ valence + utterance + pplNum + answer.order, mss, ci.low)$prob
qplot(pplNum, prob,
colour = valence,
geom="line",
data=subset(ms, prob!="NA")) +
facet_grid(answer.order~utterance) +
xlab("number of people who liked/didn't like") +
ylab("likelihood that X people liked/didn't like") +
ggtitle("Valence x Utterance") +
geom_errorbar(aes(ymin=prob-cil,ymax=prob+cih,width=.1))
lmer <- lmer(prob ~ valence * utterance + (valence | subid), data=d)
summary(lmer)
## Linear mixed model fit by REML ['lmerMod']
## Formula: prob ~ valence * utterance + (valence | subid)
## Data: d
##
## REML criterion at convergence: 528.5
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -2.6009 -0.7292 0.0535 0.6836 2.7449
##
## Random effects:
## Groups Name Variance Std.Dev. Corr
## subid (Intercept) 0.00992 0.0996
## valencelike 0.00799 0.0894 -0.37
## Residual 0.06320 0.2514
## Number of obs: 2868, groups: subid, 180
##
## Fixed effects:
## Estimate Std. Error t value
## (Intercept) 0.50535 0.01583 31.9
## valencelike -0.00692 0.01898 -0.4
## utterancepartialUtterance -0.00260 0.02253 -0.1
## utterancewholeUtterance -0.03885 0.02156 -1.8
## valencelike:utterancepartialUtterance 0.05486 0.02768 2.0
## valencelike:utterancewholeUtterance 0.02417 0.02646 0.9
##
## Correlation of Fixed Effects:
## (Intr) vlnclk uttrncpU uttrncwU vlnclk:ttrncpU
## valencelike -0.573
## uttrncprtlU -0.646 0.392
## uttrncwhlUt -0.676 0.410 0.464
## vlnclk:ttrncpU 0.383 -0.662 -0.592 -0.279
## vlnclk:ttrncwU 0.401 -0.693 -0.279 -0.591 0.470