# trupol_ana_preprocess
rm(list=ls())
library(ggplot2)
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(tidyr)
library(langcog)
##
## Attaching package: 'langcog'
## The following object is masked from 'package:base':
##
## scale
source("/Users/ericang/Documents/Research/trupol_GIT/data/version1/helper/useful.R")
## Loading required package: Matrix
##
## Attaching package: 'Matrix'
## The following object is masked from 'package:tidyr':
##
## expand
d1 <- read.csv("/Users/ericang/Documents/Research/trupol_GIT/data/version2/trupol_usindia_kidadult_160719.csv")
d1$age <- as.factor(as.character(d1$age))
d1$order <- as.factor(as.character(d1$age))
d1$site <- as.factor(as.character(d1$site))
# levels(d1$site) <- c("india", "india", "us", "us")
d1$polite <- as.factor(as.character(d1$polite))
d1$q_kind <- as.factor(as.character(d1$q_kind))
d1$cond <- as.factor(as.character(d1$cond))
levels(d1$cond) <- c("cont", "exp")
d1$answer <- as.factor(as.character(d1$answer))
# levels(d1$answer) <- c("1", "1", "0", "0", "0", "1") # honest coded as '1'
# d1$answer <- as.numeric(as.character(d1$answer))
levels(d1$answer) <- c("", 0, 0.5, 1, "character", 1, 1, "misc", 0, "listener_concern", 0, 0, "state_concern", "state_concern", "veracity_concern", 1) #check every time!
levels(d1$site) <- c("India", "US")
d1 <- d1 %>% filter(age != "4")
nice/mean/truth-telling rating
### Experimental condition
ms <- d1 %>%
filter(q_kind == "niceness" | q_kind == "meanness" | q_kind == "truth-telling") %>%
filter(!is.na(age)) %>%
filter(cond == "exp") %>%
mutate(answer = as.numeric(as.character(answer))) %>%
group_by(site, age, polite, q_kind, subid) %>%
summarize(
answer = mean(answer, na.rm=TRUE)
) %>%
group_by(site, age, polite, q_kind) %>%
multi_boot_standard(column = "answer", na.rm=TRUE) %>%
mutate(answer = mean)
## Joining by: c("site", "age", "polite", "q_kind")
# levels(ms$cond) <- c("control", "experimental")
levels(ms$site) <- c("India", "US")
levels(ms$polite) <- c("honest", "polite (dishonest)")
ms$q_kind <- factor(ms$q_kind, levels = c("truth-telling", "niceness", "meanness"))
ms$site <- relevel(ms$site, ref="US")
p <- ggplot(subset(ms, answer!="NA"),
aes(x=age, y=answer, fill=polite))
p +
geom_bar(position=position_dodge(), stat = "identity") +
facet_grid(q_kind~site) +
geom_errorbar(position=position_dodge(.9), aes(ymin=ci_lower,ymax=ci_upper,width=.1)) +
ggtitle("Judgments for honest vs. polite speaker") +
geom_hline(yintercept=.50,lty=4) +
ylab("Proportion \"yes\"") +
scale_fill_discrete(guide = guide_legend(title = "speaker"))

# control condition
ms <- d1 %>%
filter(q_kind == "niceness" | q_kind == "meanness" | q_kind == "truth-telling") %>%
filter(!is.na(age),
age!="4") %>%
filter(cond == "cont") %>%
mutate(answer = as.numeric(as.character(answer))) %>%
group_by(site, age, polite, q_kind, subid) %>%
summarize(
answer = mean(answer, na.rm=TRUE)
) %>%
group_by(site, age, polite, q_kind) %>%
multi_boot_standard(column = "answer", na.rm=TRUE) %>%
mutate(answer = mean)
## Joining by: c("site", "age", "polite", "q_kind")
# levels(ms$cond) <- c("control", "experimental")
levels(ms$site) <- c("India", "US")
levels(ms$polite) <- c("honest", "dishonest")
ms$q_kind <- factor(ms$q_kind, levels = c("truth-telling", "niceness", "meanness"))
ms$site <- relevel(ms$site, ref="US")
p <- ggplot(subset(ms, answer!="NA"),
aes(x=age, y=answer, fill=polite))
p +
geom_bar(position=position_dodge(), stat = "identity") +
facet_grid(q_kind~site) +
geom_errorbar(position=position_dodge(.9), aes(ymin=ci_lower,ymax=ci_upper,width=.1)) +
ggtitle("Honest vs. dishonest (for no reason) speaker") +
geom_hline(yintercept=.50,lty=4) +
ylab("Proportion \"yes\"") +
scale_fill_discrete(guide = guide_legend(title = "speaker"))

# us only, all cond
ms <- d1 %>%
filter(q_kind == "niceness" | q_kind == "meanness" | q_kind == "truth-telling") %>%
filter(!is.na(age)) %>%
filter(site == "US") %>%
mutate(answer = as.numeric(as.character(answer))) %>%
group_by(cond, age, polite, q_kind, subid) %>%
summarize(
answer = mean(answer, na.rm=TRUE)
) %>%
group_by(cond, age, polite, q_kind) %>%
multi_boot_standard(column = "answer", na.rm=TRUE) %>%
mutate(answer = mean)
## Joining by: c("cond", "age", "polite", "q_kind")
# levels(ms$cond) <- c("control", "experimental")
levels(ms$polite) <- c("honest", "dishonest")
ms$cond <- relevel(ms$cond, ref="exp")
levels(ms$cond) <- c("dishonest for politeness", "dishonest for no reason")
ms$q_kind <- factor(ms$q_kind, levels = c("truth-telling", "niceness", "meanness"))
p <- ggplot(subset(ms, answer!="NA"),
aes(x=age, y=answer, fill=polite))
p +
geom_bar(position=position_dodge(), stat = "identity") +
facet_grid(q_kind~cond) +
geom_errorbar(position=position_dodge(.9), aes(ymin=ci_lower,ymax=ci_upper,width=.1)) +
ggtitle("Judgments for honest vs. dishonest speaker (US)") +
geom_hline(yintercept=.50,lty=4) +
ylab("Proportion \"yes\"") +
scale_fill_discrete(guide = guide_legend(title = "speaker"))

who do you want to play with?
ms <- d1 %>%
filter(q_kind == "play",
age != "4") %>%
mutate(answer = as.numeric(as.character(answer))) %>%
group_by(site, age, cond, subid) %>%
summarize(
answer = mean(answer, na.rm=TRUE)
) %>%
group_by(site, cond, age) %>%
multi_boot_standard(column = "answer", na.rm=TRUE) %>%
mutate(answer = mean) %>%
filter(age != "NA")
## Joining by: c("site", "cond", "age")
levels(ms$cond) <- c("control", "experimental")
levels(ms$site) <- c("India", "US")
ms$site <- relevel(ms$site, ref="US")
p <- ggplot(ms, aes(x=age, y=answer, fill=cond))
dodge <- position_dodge(width=0.9)
p +
geom_bar(position = dodge, stat = "identity") +
geom_errorbar(position=dodge,aes(ymin=ci_lower,ymax=ci_upper,width=.1)) +
facet_grid(.~site) +
ggtitle("Preferred speaker to play with") +
geom_hline(yintercept=.50,lty=4) +
# guides(fill=FALSE) +
ylab("Proportion saying 'honest speaker'") +
xlab("age")

### experimental: jitter
ms <- d1 %>%
filter(q_kind == "play",
cond == "exp") %>%
mutate(answer = as.numeric(as.character(answer))) %>%
group_by(site, age, subid) %>%
summarize(
answer = mean(answer, na.rm=TRUE)
) %>%
filter(age != "NA",
answer != "0.5",
!is.na(answer)) %>%
ungroup() %>%
filter(age != "4") %>%
mutate(site = relevel(site, ref="US"))
# site = factor(site, labels = c("US", "India")))
p <- ggplot(ms, aes(x=age, y=factor(answer, label=c("polite", "honest")), color=as.factor(answer)))
p +
geom_jitter(width = 0.6, height = 0.4) +
facet_grid(.~site) +
ggtitle("Preferred speaker to play with in experimental condition") +
ylab("preferred speaker") +
scale_color_discrete(guide = "none")

listener feeling inference
ms <- d1 %>%
filter(q_kind == "listener_feeling" & cond == "exp") %>%
mutate(answer = as.numeric(as.character(answer))) %>%
group_by(site, age, polite, subid) %>%
summarize(
answer = mean(answer, na.rm=TRUE)
) %>%
group_by(site, age, polite) %>%
multi_boot_standard(column = "answer", na.rm=TRUE) %>%
mutate(answer = mean)
## Joining by: c("site", "age", "polite")
levels(ms$site) <- c("India", "US")
levels(ms$polite) <- c("honest", "polite (dishonest)")
ms$site <- relevel(ms$site, ref="US")
p <- ggplot(subset(ms, answer!="NA"),
aes(x=age, y=answer, fill=polite))
p +
geom_bar(position=position_dodge(), stat = "identity") +
facet_grid(.~site) +
geom_errorbar(position=position_dodge(.9), aes(ymin=ci_lower,ymax=ci_upper,width=.1)) +
ggtitle("Listener's feeling inference") +
geom_hline(yintercept=.50,lty=4) +
ylab("Feeling bad (0) vs. good (1)") +
scale_fill_discrete(guide = guide_legend(title = "speaker"))

# stacked percent bars
ms <- d1 %>%
filter(q_kind == "listener_feeling" & cond == "exp") %>%
mutate(answer = as.numeric(as.character(answer))) %>%
filter(age != "NA",
answer != "0.5",
!is.na(answer)) %>%
group_by(site, age, polite, answer) %>%
summarize(count=n()) %>%
mutate(perc=count/sum(count)) %>%
# multi_boot_standard(column = "perc") %>%
# mutate(perc = mean) %>%
ungroup() %>%
mutate(site = relevel(site, ref="US"))
# site = factor(site, labels = c("US", "India")))
ms$answer <- relevel(as.factor(ms$answer), ref="1")
levels(ms$answer) <- c("good", "bad")
p <- ggplot(subset(ms, answer!="NA"),
aes(x=age, y=perc, fill=answer))
p +
geom_bar(stat="identity", width = 0.7) +
facet_grid(site~polite) +
ggtitle("Listener's feeling inference") +
# geom_errorbar(position=position_dodge(.9), aes(ymin=ci_lower,ymax=ci_upper,width=.1)) +
geom_hline(yintercept=.50,lty=4) +
ylab("Proportion of responses") +
scale_fill_discrete(guide = guide_legend(title = "listener felt"))

why speaker said X
ms <- d1 %>%
filter(q_kind == "speaker_why" & cond == "exp") %>%
select(site, age, polite, subid, answer) %>%
droplevels()
levels(ms$site) <- c("India", "US")
levels(ms$polite) <- c("honest", "polite (dishonest)")
ms$site <- relevel(ms$site, ref="US")
ms$answer <- factor(ms$answer, levels = c("listener_concern", "character", "veracity_concern", "state_concern", "misc"))
p <- ggplot(subset(ms, answer!="NA"),
aes(x=polite, fill=answer))
p +
geom_bar(position="dodge") +
facet_grid(age~site) +
ggtitle("why speaker said ~") +
scale_fill_discrete(guide = guide_legend(title = "reasons")) +
xlab("speaker")

subj <- d1 %>%
distinct(subid) %>%
group_by(site, cond, order) %>%
summarise(count = n())
subj
## Source: local data frame [8 x 4]
## Groups: site, cond [?]
##
## site cond order count
## (fctr) (fctr) (fctr) (int)
## 1 India cont 6 10
## 2 India cont 8 11
## 3 India exp 6 12
## 4 India exp 8 12
## 5 US cont 6 26
## 6 US cont 8 21
## 7 US exp 6 26
## 8 US exp 8 19