3 predicates, 3 stars
rm(list = ls())
library(jsonlite)
library(tidyverse)
library(binom)
library(bootstrap)
library(langcog)
library(ggthemes)
library(forcats)
library(lme4)
raw.data.path <- "/Users/ericang/Documents/Research/polgrice_GIT/experiment/exp_versions/25_L2_J_wNeg/anonymized-results/"
## LOOP TO READ IN FILES
all.data <- data.frame()
files <- dir(raw.data.path,pattern="*.json")
for (file.name in files) {
## these are the two functions that are most meaningful
json_file <- readLines(paste(raw.data.path,file.name,sep=""))
json_file_str = paste(json_file, collapse = "")
json_file_str = gsub(",}", "}", json_file_str)
jso = jsonlite::fromJSON(json_file_str)
jso$answers$data$people <- NULL
jso1 <- data.frame(jso)
jso1$subid <- substring(file.name, 1, 7)
## now here's where data get bound together
all.data <- rbind(all.data, jso1)
}
Filter out participants and clean up.
ms_corr1 <- all.data %>%
filter(answers.data.utterance == "practice1",answers.data.judgment == "1")
ms_corr2 <- all.data %>%
filter(answers.data.utterance == "practice2",answers.data.judgment == "0")
ms_corr3 <- all.data %>%
filter(answers.data.utterance == "practice3",answers.data.judgment == "1")
## making json
d0 <- all.data %>%
filter(subid %in% ms_corr1$subid,
subid %in% ms_corr2$subid,
subid %in% ms_corr3$subid) %>%
filter(answers.data.utterance != "practice1",
answers.data.utterance != "practice2",
answers.data.utterance != "practice3") %>%
select(subid, answers.data.order, answers.data.domain, answers.data.state, answers.data.utterance, answers.data.judgment) %>%
mutate(subid = as.factor(subid)) %>%
mutate(subid = factor(subid, labels = c(1:length(levels(subid))))) %>%
mutate(trial = as.numeric(answers.data.order)) %>%
mutate(item = as.factor(answers.data.domain)) %>%
mutate(state = as.numeric(substr(answers.data.state, 6, 6))) %>%
mutate(utterance = as.factor(answers.data.utterance)) %>%
mutate(judgment = as.numeric(as.character(answers.data.judgment))) %>%
select(subid, trial, item, state, utterance, judgment) %>%
group_by(state, utterance, subid) %>%
summarize(
judgment = mean(judgment, na.rm=TRUE)
) %>%
group_by(state, utterance) %>%
summarize(posterior_b1 = 1+sum(judgment),
posterior_b2 = 1+n()-posterior_b1)
d1 <- all.data %>%
filter(subid %in% ms_corr1$subid,
subid %in% ms_corr2$subid,
subid %in% ms_corr3$subid) %>%
filter(answers.data.utterance != "practice1",
answers.data.utterance != "practice2",
answers.data.utterance != "practice3") %>%
select(subid, answers.data.order, answers.data.domain, answers.data.state, answers.data.utterance, answers.data.judgment) %>%
mutate(subid = as.factor(subid)) %>%
mutate(subid = factor(subid, labels = c(1:length(levels(subid))))) %>%
mutate(trial = as.numeric(answers.data.order)) %>%
mutate(item = as.factor(answers.data.domain)) %>%
mutate(state = as.numeric(substr(answers.data.state, 6, 6))) %>%
mutate(utterance = as.factor(answers.data.utterance)) %>%
mutate(judgment = as.numeric(as.character(answers.data.judgment))) %>%
select(subid, trial, item, state, utterance, judgment) %>%
group_by(state, utterance, subid) %>%
summarize(
judgment = mean(judgment, na.rm=TRUE)
) %>%
group_by(state, utterance) %>%
multi_boot_standard(column = "judgment") %>%
mutate(MAP = mean,
cred_low = ci_lower,
cred_upper = ci_upper) %>%
select(-mean, -ci_lower, -ci_upper)
## Joining, by = c("state", "utterance")
d <- left_join(d0, d1)
## Joining, by = c("state", "utterance")
# write(toJSON(d, pretty=TRUE), "/Users/ericang/Documents/Research/polgrice_GIT/experiment/data_analysis/data/literal_semantics_3heart.json")
d <- all.data %>%
filter(subid %in% ms_corr1$subid,
subid %in% ms_corr2$subid,
subid %in% ms_corr3$subid) %>%
filter(answers.data.utterance != "practice1",
answers.data.utterance != "practice2",
answers.data.utterance != "practice3") %>%
select(subid, answers.data.order, answers.data.domain, answers.data.state, answers.data.utterance, answers.data.judgment) %>%
mutate(subid = as.factor(subid)) %>%
mutate(trial = as.numeric(answers.data.order)) %>%
mutate(item = as.factor(answers.data.domain)) %>%
mutate(state = as.factor(substr(answers.data.state, 6, 6))) %>%
mutate(utterance = as.factor(answers.data.utterance)) %>%
mutate(judgment = as.numeric(as.character(answers.data.judgment))) %>%
mutate(positivity = factor(as.numeric(grepl("yes", utterance)),
levels = c(0, 1),
labels = c("negative","positive"))) %>%
mutate(utterance = as.factor(substring(utterance, 5))) %>%
select(subid, trial, item, state, positivity, utterance, judgment)
d$utterance <- ordered(d$utterance, levels = c("terrible", "bad", "good", "amazing"))
# d$utterance <- ordered(d$utterance, levels = c("terrible", "bad", "okay", "good", "amazing"))
d$positivity <- relevel(d$positivity, ref="positive")
levels(d$positivity) <- c("it was ___", "it wasn't ___")
levels(d$subid)
## [1] "anon-10" "anon-11" "anon-12" "anon-13" "anon-14" "anon-15" "anon-16"
## [8] "anon-17" "anon-19" "anon-2." "anon-20" "anon-21" "anon-22" "anon-24"
## [15] "anon-25" "anon-26" "anon-27" "anon-28" "anon-29" "anon-3." "anon-30"
## [22] "anon-31" "anon-32" "anon-33" "anon-35" "anon-36" "anon-37" "anon-38"
## [29] "anon-4." "anon-42" "anon-43" "anon-44" "anon-45" "anon-47" "anon-48"
## [36] "anon-5." "anon-6." "anon-7." "anon-8." "anon-9."
# goal_prob ~ true_state + utterance + goal
ms <- d %>%
group_by(positivity, state, utterance, subid) %>%
summarize(
judgment = mean(judgment, na.rm=TRUE)
) %>%
group_by(positivity, state, utterance) %>%
multi_boot_standard(column = "judgment") %>%
mutate(judgment = mean) %>%
mutate(data = "4pred")
## Joining, by = c("positivity", "state", "utterance")
## Scale for 'colour' is already present. Adding another scale for
## 'colour', which will replace the existing scale.
raw.data.path <- "/Users/ericang/Documents/Research/polgrice_GIT/experiment/exp_versions/24_S_production_3star/anonymized-results/"
## LOOP TO READ IN FILES
all.data <- data.frame()
files <- dir(raw.data.path,pattern="*.json")
for (file.name in files) {
## these are the two functions that are most meaningful
json_file <- readLines(paste(raw.data.path,file.name,sep=""))
json_file_str = paste(json_file, collapse = "")
json_file_str = gsub(",}", "}", json_file_str)
jso = jsonlite::fromJSON(json_file_str)
jso$answers$data$people <- NULL
jso$answers$data$order <- jso$answers$data$order[1:12]
jso$answers$data$utterance <- NULL
jso$answers$data$state <- jso$answers$data$state[4:15]
jso$answers$data$domain <- jso$answers$data$domain[4:15]
jso$corr <- ifelse(jso$answers$data$judgment[1] == "1" & jso$answers$data$judgment[2] == "0" & jso$answers$data$judgment[3] == "1", 1, 0)
jso$answers$data$judgment <- NULL
jso1 <- data.frame(jso)
jso1$subid <- file.name
## now here's where data get bound together
all.data <- rbind(all.data, jso1)
}
Filter out participants and clean up.
d0 <- all.data %>%
filter(corr == 1) %>%
select(subid, answers.data.cond, answers.data.order, answers.data.domain, answers.data.goal, answers.data.state, answers.data.posneg, answers.data.keyword)
d1 <- d0 %>%
filter(answers.data.cond == 1) %>%
mutate(positivity = factor(answers.data.posneg, labels = c("no_neg", "neg")),
# utterance = factor(answers.data.keyword, labels = c("terrible", "bad", "good", "amazing")))
utterance = recode(answers.data.keyword,
keyword0 = "terrible",
keyword1 = "bad",
keyword3 = "good",
keyword4 = "amazing"
))
d2 <- d0 %>%
filter(answers.data.cond == 2) %>%
filter(answers.data.posneg != "" & answers.data.keyword != "") %>%
mutate(positivity = factor(answers.data.posneg, labels = c("no_neg", "neg")),
utterance = factor(answers.data.keyword, labels = c("amazing", "good", "bad", "terrible")))
d3 <- d0 %>%
filter(answers.data.cond == 3) %>%
mutate(positivity = factor(answers.data.posneg, labels = c("neg", "no_neg")),
utterance = factor(answers.data.keyword, labels = c("terrible", "bad", "good", "amazing")))
d4 <- d0 %>%
filter(answers.data.cond == 4) %>%
mutate(positivity = factor(answers.data.posneg, labels = c("neg", "no_neg")),
utterance = factor(answers.data.keyword, labels = c("amazing", "good", "bad", "terrible")))
d <- rbind(d1, d2, d3, d4) %>%
mutate(goal = answers.data.goal) %>%
mutate(trial = answers.data.order) %>%
mutate(item = answers.data.domain) %>%
mutate(true_state = answers.data.state) %>%
select(subid, trial, goal, true_state, positivity, utterance)
d$subid <- as.factor(d$subid)
d$trial <- as.numeric(d$trial)
d$positivity <- as.factor(d$positivity)
d$true_state <- as.factor(d$true_state)
d$utterance <- as.factor(d$utterance)
d$goal <- factor(d$goal, levels =c("informative", "social", "both"))
d$utterance <- ordered(d$utterance, levels = c("terrible", "bad", "good", "amazing"))
ms <- d %>%
select(subid, goal, true_state, positivity, utterance) %>%
mutate(positivity = factor(positivity, labels = c("yes", "not"))) %>%
mutate(utterance = paste(positivity, utterance, sep="_")) %>%
mutate(true_state = substr(true_state, 6, 6)) %>%
mutate(subid = factor(subid, labels = c(1:length(levels(subid))))) %>%
select(subid, goal, true_state, utterance)
# write.csv(ms, "/Users/ericang/Documents/Research/polgrice_GIT/experiment/data_analysis/data/speaker_3heart_registered.csv", # CHANGE FILE NAME AS NEEDED
# row.names=FALSE)
# write(toJSON(ms, pretty=TRUE), "/Users/ericang/Documents/Research/polgrice_GIT/experiment/data_analysis/data/speaker_3heart_registered.json")
# d <- ms %>%
# separate(utterance, sep="_")
ms2 <- d %>%
filter(!is.na(positivity), !is.na(utterance)) %>% # why is there NA?
group_by(true_state, goal) %>%
summarise(n.total=n())
ms3 <- d %>%
filter(!is.na(positivity), !is.na(utterance)) %>% # why is there NA?
group_by(true_state, goal, positivity, utterance) %>%
summarize(n = n())
ms <- left_join(ms2, ms3) %>%
group_by(true_state, goal, positivity, utterance) %>%
summarize(mean = n / n.total,
ci_lower = binom.bayes(n, n.total)$lower,
ci_upper = binom.bayes(n, n.total)$upper)
## Joining, by = c("true_state", "goal")
ms_fake <- cbind(expand.grid(true_state=levels(ms$true_state),goal=levels(ms$goal),positivity=levels(ms$positivity), utterance=levels(ms$utterance)), mean=NA, ci_lower=NA, ci_upper=NA)
ms.all <- rbind(data.frame(ms), data.frame(ms_fake))
levels(ms.all$true_state) <- c("0 heart", "1 heart", "2 hearts", "3 hearts")
levels(ms.all$goal) <- c("want to be informative", "want to make listener feel good", "want both")
levels(ms.all$goal) <- c("Data:informative", "Data:social", "Data:both")
levels(ms.all$positivity) <- c("yes", "not")
ggplot(data=filter(ms.all, !is.na(mean)), aes(x=utterance, y=mean, color = positivity, group = positivity)) +
geom_line()+
facet_grid(goal~true_state) +
xlab("no negation (It was ~) vs negation (It wasn't ~) ") +
ylab("proportion chosen") +
geom_linerange(aes(ymin=ci_lower,ymax=ci_upper), position=position_dodge(width = 0.1)) +
geom_hline(yintercept=.125, lty=2) +
ylim(0,1)+
scale_color_solarized()+ # can't choose starting color with accent="blue"?
ggthemes::theme_few()+
theme(axis.text.x = element_text(angle = 90)) +
ggtitle("Speaker production - Data only")
ms.all.4pred <- ms.all
ms2 <- d %>%
filter(!is.na(positivity), !is.na(utterance)) %>% # why is there NA?
group_by(true_state, goal) %>%
summarise(n.total=n())
ms3 <- d %>%
filter(!is.na(positivity), !is.na(utterance)) %>% # why is there NA?
group_by(true_state, goal, positivity) %>%
summarize(n = n())
ms <- left_join(ms2, ms3) %>%
group_by(true_state, goal, positivity) %>%
summarize(mean = n / n.total,
ci_lower = binom.bayes(n, n.total)$lower,
ci_upper = binom.bayes(n, n.total)$upper)
## Joining, by = c("true_state", "goal")
ms_fake <- cbind(expand.grid(true_state=levels(ms$true_state),goal=levels(ms$goal),positivity=levels(ms$positivity)), mean=NA, ci_lower=NA, ci_upper=NA)
ms.all <- rbind(data.frame(ms), data.frame(ms_fake))
levels(ms.all$true_state) <- c("0 heart", "1 heart", "2 hearts", "3 hearts")
# levels(ms.all$goal) <- c("want to be informative", "want to make listener feel good", "want both")
# levels(ms.all$goal) <- c("Data:informative", "Data:social", "Data:both")
levels(ms.all$positivity) <- c("yes", "not")
ms.all %>%
filter(!is.na(mean)) %>%
filter(positivity == "not") %>%
ggplot(.,
# filter(positivity == "not"),
aes(x=true_state, y=mean,
# linetype = positivity,
color=goal,
group=goal)) +
# group=interaction(goal, positivity))) +
geom_line(stat="identity", position=position_dodge()) +
xlab("true state") +
ylab("proportion negation") +
geom_linerange(aes(ymin=ci_lower,ymax=ci_upper), position=position_dodge(width=.15)) +
# scale_colour_discrete(guide = guide_legend(title = "utterance type")) +
theme_few()+
scale_color_solarized() +
ggtitle("Speaker production: Compare neg (\"not\") vs. no neg (\"yes\")")
## Warning: Width not defined. Set with `position_dodge(width = ?)`
# lmer to look at neg ~ state * goal
ms_glmer <- d %>%
mutate(positivity = factor(positivity, labels = c(0,1))) %>%
mutate(positivity = as.numeric(as.character(positivity))) %>%
mutate(true_state = as.numeric(substr(true_state, 6, 6)))
# mutate(goal = factor(goal, levels = c("both", "informative", "social")))
# filter(true_state < 3)
summary(glmer(data=ms_glmer, positivity ~ true_state * goal * (1|subid), family=binomial))
## Generalized linear mixed model fit by maximum likelihood (Laplace
## Approximation) [glmerMod]
## Family: binomial ( logit )
## Formula: positivity ~ true_state * goal * (1 | subid)
## Data: ms_glmer
##
## AIC BIC logLik deviance df.resid
## 1728.7 1767.9 -857.4 1714.7 1985
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -3.1067 -0.4579 -0.2704 0.3459 3.8653
##
## Random effects:
## Groups Name Variance Std.Dev.
## subid (Intercept) 0.1903 0.4362
## Number of obs: 1992, groups: subid, 166
##
## Fixed effects:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -0.35951 0.14661 -2.452 0.0142 *
## true_state -0.71666 0.09689 -7.397 1.39e-13 ***
## goalsocial 0.55164 0.20454 2.697 0.0070 **
## goalboth 2.38623 0.24191 9.864 < 2e-16 ***
## true_state:goalsocial -0.66317 0.16206 -4.092 4.27e-05 ***
## true_state:goalboth -1.19809 0.16841 -7.114 1.13e-12 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Correlation of Fixed Effects:
## (Intr) tr_stt golscl golbth tr_stt:gls
## true_state -0.689
## goalsocial -0.679 0.493
## goalboth -0.582 0.403 0.419
## tr_stt:glsc 0.420 -0.584 -0.684 -0.262
## tr_stt:glbt 0.407 -0.556 -0.293 -0.786 0.346