# trupol_ana_preprocess
rm(list=ls())
library(ggplot2)
library(plyr)
library(dplyr)
## Warning: package 'dplyr' was built under R version 3.1.2
##
## Attaching package: 'dplyr'
##
## The following objects are masked from 'package:plyr':
##
## arrange, count, desc, failwith, id, mutate, rename, summarise,
## summarize
##
## The following object is masked from 'package:stats':
##
## filter
##
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(tidyr)
## Warning: package 'tidyr' was built under R version 3.1.2
source("/Users/ericang/Documents/Research/Politeness/trupol_git/data_analysis/helper/useful.R")
## Loading required package: Matrix
##
## Attaching package: 'Matrix'
##
## The following object is masked from 'package:tidyr':
##
## expand
##
## Loading required package: Rcpp
## Warning: package 'reshape2' was built under R version 3.1.2
d <- read.csv("/Users/ericang/Documents/Research/Politeness/trupol_git/data_analysis/data/trupol_data.csv")
log <- read.csv("/Users/ericang/Documents/Research/Politeness/trupol_git/data_analysis/info/trupol_subj.csv")
# join with subj log
d <- join(d, log)
## Joining by: subid
# select key vars
d <- d %>%
select(subid, Age, site, trial1_2_evalCorrect, trial1_2_playCorrect, trial3_4_playCorrect, trial3_4_evalCorrect, trial1_niceness, trial2_niceness, trial3_niceness, trial4_niceness, trial1_Lfeel_val, trial2_Lfeel_val, trial3_Lfeel_val, trial4_Lfeel_val)
# categorize age
d$Age <- as.numeric(as.character(d$Age))
d <- cbind(d, age_cat = cut(d$Age, breaks=c(3, 5, 7, 9)))
levels(d$age_cat) <- c("3-4", "5-6", "7-8")
summary(d$age_cat)
## 3-4 5-6 7-8
## 23 38 19
# reshape data
d <- d %>%
gather("q", "answer", 4:15)
## Warning: attributes are not identical across measure variables; they will
## be dropped
# add columns to categorize vars
d1 <- d %>%
mutate(polite = factor(substring(q, 1, 8),
levels = c("trial1_2", "trial3_4",
"trial1_n", "trial2_n", "trial3_n", "trial4_n",
"trial1_L", "trial2_L", "trial3_L", "trial4_L"),
labels = c("NA", "NA",
"impolite", "polite", "polite", "impolite",
"impolite", "polite", "polite", "impolite")),
q_kind = factor(substring(q, 8, 10),
levels = c("2_e", "2_p", "4_p", "4_e",
"nic", "Lfe"),
labels = c("eval", "play", "play", "eval",
"niceness", "Lfeel")))
## Warning: duplicated levels in factors are deprecated
## Warning: duplicated levels in factors are deprecated
d1$polite <- as.factor(as.character(d1$polite))
d1$q_kind <- as.factor(as.character(d1$q_kind))
d1$answer <- as.factor(as.character(d1$answer))
levels(d1$answer) <- c("NA", "NA", "0", "NA", "1", "2", "3", "4", "5", "0", "1")
d1$answer <- as.numeric(as.character(d1$answer))
## Warning: NAs introduced by coercion
play: “who do you want to play with?” eval: “whose snack do you think is tastier (given the same utterance)?”
# plot: eval and play
mss <- d1 %>%
filter(q_kind == "play" | q_kind == "eval") %>%
group_by(q_kind, age_cat, site, subid) %>%
summarize(
answer = mean(answer, na.rm=TRUE)
)
ms <- aggregate(answer ~ q_kind + age_cat + site, mss, mean)
ms$cih <- aggregate(answer ~ q_kind + age_cat + site, mss, ci.high)$answer
ms$cil <- aggregate(answer ~ q_kind + age_cat + site, mss, ci.low)$answer
x-axis: age groups y-axis: answer; 0 = incorrect, 1 = correct
qplot(age_cat, answer,
fill = age_cat,
geom="bar", position = "dodge", stat="identity",
data=subset(ms, answer!="NA")) +
facet_wrap(site~q_kind) +
geom_errorbar(aes(ymin=answer-cil,ymax=answer+cih,width=.1))
# histogram
qplot(age_cat, # 1 = correct
fill = as.factor(answer), position = "dodge",
geom="histogram",
data=subset(d1, answer!="NA" & (q_kind == "play" | q_kind == "eval"))) +
facet_grid(site~q_kind)
“How nice is he/she?”
# plot: niceness
mss <- d1 %>%
filter(q_kind == "niceness") %>%
group_by(polite, age_cat, site, subid) %>%
summarize(
answer = mean(answer, na.rm=TRUE)
)
ms <- aggregate(answer ~ polite + age_cat + site, mss, mean)
ms$cih <- aggregate(answer ~ polite + age_cat + site, mss, ci.high)$answer
ms$cil <- aggregate(answer ~ polite + age_cat + site, mss, ci.low)$answer
1 = really mean, 5 = really nice
qplot(polite, answer,
fill = polite,
geom="bar", position = "dodge", stat="identity",
data=subset(ms, answer!="NA")) +
facet_grid(site~age_cat) +
geom_errorbar(aes(ymin=answer-cil,ymax=answer+cih,width=.1))
The older group performs better: they correctly infer that the polite speaker is nicer
# histogram
qplot(polite,
fill = as.factor(answer),
geom="histogram", position = "dodge",
data=subset(d1, answer!="NA" & (q_kind == "niceness") & site != "NA")) +
facet_grid(site~age_cat)
“How did he/she feel (after hearing the speaker’s utterance)?”
# plot: listener feeling inference
mss <- d1 %>%
filter(q_kind == "Lfeel") %>%
group_by(polite, age_cat, site, subid) %>%
summarize(
answer = mean(answer, na.rm=TRUE)
)
# look at correctness of responses
mss[mss$polite == "impolite",]$answer <- (mss[mss$polite == "impolite",]$answer - 1) * (-1)
ms <- aggregate(answer ~ polite + age_cat + site, mss, mean)
ms$cih <- aggregate(answer ~ polite + age_cat + site, mss, ci.high)$answer
ms$cil <- aggregate(answer ~ polite + age_cat + site, mss, ci.low)$answer
0 = incorrect, 1 = correct
qplot(polite, answer,
fill = polite,
geom="bar", position = "dodge", stat="identity",
data=subset(ms, answer!="NA")) +
facet_grid(site~age_cat) +
geom_errorbar(aes(ymin=answer-cil,ymax=answer+cih,width=.1))
For both sites, 5-6 year-olds all correctly infer the listener’s feeling after hearing the impolite speaker’s utterance (“they felt bad”), whereas more errors occur for listener’s feeling upon hearing the polite speaker’s utterance.
Weird pattern for 3-4 year-olds: because of too small number? - probably, look at the histogram below
0 = incorrect, 1 = correct
# histogram
# temporary data d2
d2 <- d1
# look at correctness of responses
d2$answer <- as.numeric(as.character(d2$answer))
d2[d2$polite == "impolite",]$answer <- (d2[d2$polite == "impolite",]$answer - 1) * (-1)
qplot(polite,
fill = as.factor(answer),
geom="histogram", position = "dodge",
data=subset(d2, answer!="NA" & (q_kind == "Lfeel") & site != "NA")) +
facet_grid(site~age_cat)
Pattern looks the same for Korea and US
For participants who correctly inferred listener’s feeling upon hearing the utterance, did they infer the speaker’s niceness better too?
d <- read.csv("/Users/ericang/Documents/Research/Politeness/trupol_git/data_analysis/data/trupol_data.csv")
log <- read.csv("/Users/ericang/Documents/Research/Politeness/trupol_git/data_analysis/info/trupol_subj.csv")
# join with subj log
d <- join(d, log)
## Joining by: subid
# select key vars
d <- d %>%
select(subid, Age, site, trial1_niceness, trial2_niceness, trial3_niceness, trial4_niceness, trial1_Lfeel_val, trial2_Lfeel_val, trial3_Lfeel_val, trial4_Lfeel_val)
# categorize age
d$Age <- as.numeric(as.character(d$Age))
d <- cbind(d, age_cat = cut(d$Age, breaks=c(3, 4, 5, 6, 7)))
levels(d$age_cat) <- c("3", "4", "5", "6")
# reshape data
d <- d %>%
gather("q", "answer", 4:11)
## Warning: attributes are not identical across measure variables; they will
## be dropped
# add columns to categorize vars
d1 <- d %>%
mutate(trial = substring(q, 6, 6), q_kind = substring(q, 8, 16)) %>%
select(subid, site, age_cat, trial, q_kind, answer) %>%
spread(q_kind, answer) %>%
mutate(polite = factor(trial,
levels = c("1", "2", "3", "4"),
labels = c(
"impolite", "polite", "polite", "impolite")))
## Warning: duplicated levels in factors are deprecated
d1$Lfeel_val <- as.factor(as.numeric(as.character(d1$Lfeel_val)))
d1$niceness <- as.numeric(as.character(d1$niceness))
## Warning: NAs introduced by coercion
d1$polite <- as.factor(as.character(d1$polite))
d1$site <- as.factor(as.character(d1$site))
ggplot(subset(d1, Lfeel_val != "NA" & site != "NA"), aes(x=Lfeel_val, y=niceness)) +
geom_jitter(position = position_jitter(height = .1, width = .3), aes(colour = niceness)) +
facet_grid(site~.)
Overall, those who thought the listener felt bad had more mixed responses about speaker’s niceness, whereas those who thought the listener felt good more correctly inferred the speaker was nice.
# facet by politeness
ggplot(subset(d1, Lfeel_val != "NA" & site != "NA"), aes(x=Lfeel_val, y=niceness)) +
geom_jitter(position = position_jitter(height = .1, width = .3), aes(colour = niceness)) +
facet_grid(site~polite)
After an impolite speaker says something, participants correctly infer the listener’s feelings, but incorrectly infers that the impolite speaker is nice. This is true for both sites.