# trupol_ana_preprocess
rm(list=ls())
library(ggplot2)
library(plyr)
library(dplyr)
## Warning: package 'dplyr' was built under R version 3.1.2
##
## Attaching package: 'dplyr'
##
## The following objects are masked from 'package:plyr':
##
## arrange, count, desc, failwith, id, mutate, rename, summarise,
## summarize
##
## The following object is masked from 'package:stats':
##
## filter
##
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(tidyr)
## Warning: package 'tidyr' was built under R version 3.1.2
source("/Users/ericang/Documents/Research/Politeness/trupol_git/data_analysis/helper/useful.R")
## Loading required package: Matrix
##
## Attaching package: 'Matrix'
##
## The following object is masked from 'package:tidyr':
##
## expand
##
## Loading required package: Rcpp
## Warning: package 'reshape2' was built under R version 3.1.2
d <- read.csv("/Users/ericang/Documents/Research/Politeness/trupol_git/data_analysis/data/trupol_data.csv")
log <- read.csv("/Users/ericang/Documents/Research/Politeness/trupol_git/data_analysis/info/trupol_subj.csv")
# join with subj log
d <- join(d, log)
## Joining by: subid
# select key vars
d <- d %>%
select(subid, Age, trial1_2_evalCorrect, trial1_2_playCorrect, trial3_4_playCorrect, trial3_4_evalCorrect, trial1_niceness, trial2_niceness, trial3_niceness, trial4_niceness, trial1_Lfeel_val, trial2_Lfeel_val, trial3_Lfeel_val, trial4_Lfeel_val)
# categorize age
d$Age <- as.numeric(as.character(d$Age))
d <- cbind(d, age_cat = cut(d$Age, breaks=c(3, 4, 5, 6, 7)))
levels(d$age_cat) <- c("3", "4", "5", "6")
summary(d$age_cat)
## 3 4 5 6
## 5 15 11 2
# reshape data
d <- d %>%
gather("q", "answer", 3:14)
## Warning: attributes are not identical across measure variables; they will
## be dropped
# add columns to categorize vars
d1 <- d %>%
mutate(polite = factor(substring(q, 1, 8),
levels = c("trial1_2", "trial3_4",
"trial1_n", "trial2_n", "trial3_n", "trial4_n",
"trial1_L", "trial2_L", "trial3_L", "trial4_L"),
labels = c("NA", "NA",
"impolite", "polite", "polite", "impolite",
"impolite", "polite", "polite", "impolite")),
q_kind = factor(substring(q, 8, 10),
levels = c("2_e", "2_p", "4_p", "4_e",
"nic", "Lfe"),
labels = c("eval", "play", "play", "eval",
"niceness", "Lfeel")))
## Warning: duplicated levels in factors are deprecated
## Warning: duplicated levels in factors are deprecated
d1$polite <- as.factor(as.character(d1$polite))
d1$q_kind <- as.factor(as.character(d1$q_kind))
d1$answer <- as.factor(as.character(d1$answer))
levels(d1$answer) <- c("NA", "NA", "0", "1", "2", "3", "4", "5", "0", "1")
d1$answer <- as.numeric(as.character(d1$answer))
## Warning: NAs introduced by coercion
play: “who do you want to play with?” eval: “whose snack do you think is tastier (given the same utterance)?”
# plot: eval and play
mss <- d1 %>%
filter(q_kind == "play" | q_kind == "eval") %>%
group_by(q_kind, age_cat, subid) %>%
summarize(
answer = mean(answer, na.rm=TRUE)
)
ms <- aggregate(answer ~ q_kind + age_cat, mss, mean)
ms$cih <- aggregate(answer ~ q_kind + age_cat, mss, ci.high)$answer
ms$cil <- aggregate(answer ~ q_kind + age_cat, mss, ci.low)$answer
qplot(age_cat, answer,
fill = age_cat,
geom="bar", position = "dodge", stat="identity",
data=subset(ms, answer!="NA")) +
facet_wrap(~q_kind) +
geom_errorbar(aes(ymin=answer-cil,ymax=answer+cih,width=.1))
# histogram
qplot(as.factor(answer), # 1 = correct
fill = age_cat,
geom="histogram",
data=subset(d1, answer!="NA" & (q_kind == "play" | q_kind == "eval"))) +
facet_grid(age_cat~q_kind)
“How nice is he/she?”
# plot: niceness
mss <- d1 %>%
filter(q_kind == "niceness") %>%
group_by(polite, age_cat, subid) %>%
summarize(
answer = mean(answer, na.rm=TRUE)
)
ms <- aggregate(answer ~ polite + age_cat, mss, mean)
ms$cih <- aggregate(answer ~ polite + age_cat, mss, ci.high)$answer
ms$cil <- aggregate(answer ~ polite + age_cat, mss, ci.low)$answer
qplot(polite, answer,
fill = polite,
geom="bar", position = "dodge", stat="identity",
data=subset(ms, answer!="NA")) +
facet_wrap(~age_cat) +
geom_errorbar(aes(ymin=answer-cil,ymax=answer+cih,width=.1))
# histogram
qplot(as.factor(answer),
fill = polite,
geom="histogram",
data=subset(d1, answer!="NA" & (q_kind == "niceness"))) +
facet_grid(polite~age_cat)
“How did he/she feel (after hearing the speaker’s utterance)?”
# plot: listener feeling inference
mss <- d1 %>%
filter(q_kind == "Lfeel") %>%
group_by(polite, age_cat, subid) %>%
summarize(
answer = mean(answer, na.rm=TRUE)
)
# look at correctness of responses
mss[mss$polite == "impolite",]$answer <- (mss[mss$polite == "impolite",]$answer - 1) * (-1)
ms <- aggregate(answer ~ polite + age_cat, mss, mean)
ms$cih <- aggregate(answer ~ polite + age_cat, mss, ci.high)$answer
ms$cil <- aggregate(answer ~ polite + age_cat, mss, ci.low)$answer
qplot(polite, answer,
fill = polite,
geom="bar", position = "dodge", stat="identity",
data=subset(ms, answer!="NA")) +
facet_wrap(~age_cat) +
geom_errorbar(aes(ymin=answer-cil,ymax=answer+cih,width=.1))
# histogram
# temporary data d2
d2 <- d1
# look at correctness of responses
d2$answer <- as.numeric(as.character(d2$answer))
d2[d2$polite == "impolite",]$answer <- (d2[d2$polite == "impolite",]$answer - 1) * (-1)
qplot(as.factor(answer),
fill = polite,
geom="histogram",
data=subset(d2, answer!="NA" & (q_kind == "Lfeel"))) +
facet_grid(polite~age_cat)
d <- read.csv("/Users/ericang/Documents/Research/Politeness/trupol_git/data_analysis/data/trupol_data.csv")
log <- read.csv("/Users/ericang/Documents/Research/Politeness/trupol_git/data_analysis/info/trupol_subj.csv")
# join with subj log
d <- join(d, log)
## Joining by: subid
# select key vars
d <- d %>%
select(subid, Age, trial1_niceness, trial2_niceness, trial3_niceness, trial4_niceness, trial1_Lfeel_val, trial2_Lfeel_val, trial3_Lfeel_val, trial4_Lfeel_val)
# categorize age
d$Age <- as.numeric(as.character(d$Age))
d <- cbind(d, age_cat = cut(d$Age, breaks=c(3, 4, 5, 6, 7)))
levels(d$age_cat) <- c("3", "4", "5", "6")
# reshape data
d <- d %>%
gather("q", "answer", 3:10)
## Warning: attributes are not identical across measure variables; they will
## be dropped
# add columns to categorize vars
d1 <- d %>%
mutate(trial = substring(q, 6, 6), q_kind = substring(q, 8, 16)) %>%
select(subid, age_cat, trial, q_kind, answer) %>%
spread(q_kind, answer) %>%
mutate(polite = factor(trial,
levels = c("1", "2", "3", "4"),
labels = c(
"impolite", "polite", "polite", "impolite")))
## Warning: duplicated levels in factors are deprecated
d1$Lfeel_val <- as.factor(as.numeric(as.character(d1$Lfeel_val)))
d1$niceness <- as.numeric(as.character(d1$niceness))
## Warning: NAs introduced by coercion
d1$polite <- as.factor(as.character(d1$polite))
ggplot(subset(d1, Lfeel_val != "NA"), aes(x=Lfeel_val, y=niceness)) +
geom_jitter(position = position_jitter(height = .1, width = .3), aes(colour = niceness))
# facet by politeness
ggplot(subset(d1, Lfeel_val != "NA"), aes(x=Lfeel_val, y=niceness)) +
geom_jitter(position = position_jitter(height = .1, width = .3), aes(colour = niceness)) +
facet_grid(.~polite)