# trupol_ana_preprocess
rm(list=ls())
library(ggplot2)
library(plyr)
library(dplyr)
##
## Attaching package: 'dplyr'
##
## The following objects are masked from 'package:plyr':
##
## arrange, count, desc, failwith, id, mutate, rename, summarise,
## summarize
##
## The following objects are masked from 'package:stats':
##
## filter, lag
##
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(tidyr)
source("/Users/ericang/Documents/Research/trupol/data/version 1/helper/useful.R")
## Loading required package: Matrix
##
## Attaching package: 'Matrix'
##
## The following object is masked from 'package:tidyr':
##
## expand
d <- read.csv("/Users/ericang/Documents/Research/trupol/data/version 2/india/trupol_coding.csv")
# log <- read.csv("/Users/ericang/Documents/Research/Politeness/trupol_git/data_analysis/info/trupol_subj.csv")
#
# # join with subj log
# d <- join(d, log)
# select key vars
d <- d %>%
select(subid, site, age, cond, trial1_2_eval, trial1_2_play, trial3_4_play, trial3_4_eval, trial1_nice, trial2_nice, trial3_nice, trial4_nice, trial1_mean, trial2_mean, trial3_mean, trial4_mean, trial1_truth, trial2_truth, trial3_truth, trial4_truth)
# # categorize age
# d$Age <- as.numeric(as.character(d$Age))
# d <- cbind(d, age_cat = cut(d$Age, breaks=c(3, 5, 7, 9)))
# levels(d$age_cat) <- c("3-4", "5-6", "7-8")
# summary(d$age_cat)
# reshape data
d <- d %>%
gather("q", "answer", 5:20)
## Warning: attributes are not identical across measure variables; they will
## be dropped
# add columns to categorize vars
d1 <- d %>%
mutate(polite = factor(substring(q, 1, 8),
levels = c("trial1_2", "trial3_4",
"trial1_n", "trial2_n", "trial3_n", "trial4_n",
"trial1_m", "trial2_m", "trial3_m", "trial4_m",
"trial1_t", "trial2_t", "trial3_t", "trial4_t"),
labels = c("NA", "NA",
"honest", "polite", "polite", "honest",
"honest", "polite", "polite", "honest",
"honest", "polite", "polite", "honest")),
q_kind = factor(substring(q, 8, 10),
levels = c("2_e", "2_p", "4_p", "4_e",
"nic", "mea", "tru"),
labels = c("eval", "play", "play", "eval",
"niceness", "meanness", "truth-telling")))
## Warning in `levels<-`(`*tmp*`, value = if (nl == nL) as.character(labels)
## else paste0(labels, : duplicated levels in factors are deprecated
## Warning in `levels<-`(`*tmp*`, value = if (nl == nL) as.character(labels)
## else paste0(labels, : duplicated levels in factors are deprecated
d1$age <- as.factor(as.character(d1$age))
d1$polite <- as.factor(as.character(d1$polite))
d1$q_kind <- as.factor(as.character(d1$q_kind))
d1$answer <- as.factor(as.character(d1$answer))
levels(d1$answer) <- c("1", "1", "0", "0", "0", "1") # honest coded as '1'
d1$answer <- as.numeric(as.character(d1$answer))
play: “who do you want to play with?” eval: “whose snack do you think is tastier (given the same utterance)?”
# plot: eval and play
mss <- d1 %>%
filter(q_kind == "play" | q_kind == "eval") %>%
group_by(age, cond, q_kind, subid) %>%
summarize(
answer = mean(answer, na.rm=TRUE)
)
ms <- aggregate(answer ~ age + cond + q_kind, mss, mean)
ms$cih <- aggregate(answer ~ age + cond + q_kind, mss, ci.high)$answer
ms$cil <- aggregate(answer ~ age + cond + q_kind, mss, ci.low)$answer
x-axis: age groups y-axis: answer; 0 = incorrect, 1 = correct
levels(ms$cond) <- c("control", "experimental")
p <- ggplot(ms, aes(x=q_kind, y=answer, fill=q_kind))
p +
geom_bar(position = "dodge", stat = "identity") +
geom_errorbar(aes(ymin=answer-cil,ymax=answer+cih,width=.1)) +
facet_grid(cond~age) +
ggtitle("Proportion saying 'honest speaker' on eval & play questions")
Notice how the pattern is OPPOSITE to what we saw in Korea; children prefer to play with honest speaker even in the experimental condition, in which the honest speaker is being impolite. On the other hand, for eval question (*which is different from one we had asked before. This eval question is: “If Eshan baked another cookie, who would he ask?”), more children tended to answer that the protagonist will ask the polite speaker.
# plot: niceness
mss <- d1 %>%
filter(q_kind == "niceness" | q_kind == "meanness" | q_kind == "truth-telling") %>%
group_by(age, cond, polite, q_kind, subid) %>%
summarize(
answer = mean(answer, na.rm=TRUE)
)
ms <- aggregate(answer ~ age + cond + polite + q_kind, mss, mean)
ms$cih <- aggregate(answer ~ age + cond + polite + q_kind, mss, ci.high)$answer
ms$cil <- aggregate(answer ~ age + cond + polite + q_kind, mss, ci.low)$answer
levels(ms$cond) <- c("control", "experimental")
p <- ggplot(subset(ms, answer!="NA"),
aes(x=cond, y=answer, fill=polite))
p +
geom_bar(position=position_dodge(), stat = "identity") +
facet_grid(q_kind~age) +
geom_errorbar(position=position_dodge(.9), aes(ymin=answer-cil,ymax=answer+cih,width=.1)) +
ggtitle("Proportion \"yes\"")
Both age groups correctly said the honest speaker was telling the truth, whereas the dishonest speaker was not.
8-year-olds said dishonest speaker is less mean and nicer when he/she is being polite compared to when he/she is lying with no reason, whereas 6-year-olds did not differentiate between the two situations.
For both age groups, but especially 6-year-olds interestingly, there was numerical difference which suggests the honest speaker was considered to be more mean and less nice when he/she was being impolite.