# trupol_ana_preprocess

rm(list=ls())
library(ggplot2)
library(plyr)
library(dplyr)
## Warning: package 'dplyr' was built under R version 3.1.2
## 
## Attaching package: 'dplyr'
## 
## The following objects are masked from 'package:plyr':
## 
##     arrange, count, desc, failwith, id, mutate, rename, summarise,
##     summarize
## 
## The following object is masked from 'package:stats':
## 
##     filter
## 
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library(tidyr)
## Warning: package 'tidyr' was built under R version 3.1.2
source("/Users/ericang/Documents/Research/Politeness/trupol_git/data_analysis/helper/useful.R")
## Loading required package: Matrix
## 
## Attaching package: 'Matrix'
## 
## The following object is masked from 'package:tidyr':
## 
##     expand
## 
## Loading required package: Rcpp
## Warning: package 'reshape2' was built under R version 3.1.2
d <- read.csv("/Users/ericang/Documents/Research/Politeness/trupol_git/data_analysis/data/trupol_data.csv")
log <- read.csv("/Users/ericang/Documents/Research/Politeness/trupol_git/data_analysis/info/trupol_subj.csv")

# join with subj log
d <- join(d, log)
## Joining by: subid
# select key vars 
d <- d %>%
  select(subid, Age, trial1_2_evalCorrect, trial1_2_playCorrect, trial3_4_playCorrect, trial3_4_evalCorrect, trial1_niceness, trial2_niceness, trial3_niceness, trial4_niceness, trial1_Lfeel_val, trial2_Lfeel_val, trial3_Lfeel_val, trial4_Lfeel_val)

# categorize age
d$Age <- as.numeric(as.character(d$Age))
d <- cbind(d, age_cat = cut(d$Age, breaks=c(3, 4, 5, 6, 7)))
levels(d$age_cat) <- c("3", "4", "5", "6")
summary(d$age_cat)
##  3  4  5  6 
##  5 15 11  2
# reshape data
d <- d %>%
  gather("q", "answer", 3:14)
## Warning: attributes are not identical across measure variables; they will
## be dropped
# add columns to categorize vars
d1 <- d %>%
  mutate(polite = factor(substring(q, 1, 8),
                         levels = c("trial1_2", "trial3_4", 
                                    "trial1_n", "trial2_n", "trial3_n", "trial4_n",
                                    "trial1_L", "trial2_L", "trial3_L", "trial4_L"),
                         labels = c("NA", "NA",
                                    "impolite", "polite", "polite", "impolite",
                                    "impolite", "polite", "polite", "impolite")),
         q_kind = factor(substring(q, 8, 10),
                         levels = c("2_e", "2_p", "4_p", "4_e",
                                    "nic", "Lfe"),
                         labels = c("eval", "play", "play", "eval",
                                    "niceness", "Lfeel")))
## Warning: duplicated levels in factors are deprecated
## Warning: duplicated levels in factors are deprecated
d1$polite <- as.factor(as.character(d1$polite))
d1$q_kind <- as.factor(as.character(d1$q_kind))
d1$answer <- as.factor(as.character(d1$answer))
levels(d1$answer) <- c("NA", "NA", "0", "1", "2", "3", "4", "5", "0", "1")
d1$answer <- as.numeric(as.character(d1$answer))
## Warning: NAs introduced by coercion

correct responses on eval and play questions

play: “who do you want to play with?” eval: “whose snack do you think is tastier (given the same utterance)?”

# plot: eval and play
mss <- d1 %>%
  filter(q_kind == "play" | q_kind == "eval") %>%
  group_by(q_kind, age_cat, subid) %>%
  summarize(
    answer = mean(answer, na.rm=TRUE)
  )
ms <- aggregate(answer ~ q_kind + age_cat, mss, mean)
ms$cih <- aggregate(answer ~ q_kind + age_cat, mss, ci.high)$answer
ms$cil <- aggregate(answer ~ q_kind + age_cat, mss, ci.low)$answer

qplot(age_cat, answer, 
      fill = age_cat, 
      geom="bar", position = "dodge", stat="identity",
      data=subset(ms, answer!="NA")) + 
  facet_wrap(~q_kind) +
  geom_errorbar(aes(ymin=answer-cil,ymax=answer+cih,width=.1))

plot of chunk unnamed-chunk-2

# histogram
qplot(as.factor(answer), # 1 = correct
      fill = age_cat,
      geom="histogram",
      data=subset(d1, answer!="NA" & (q_kind == "play" | q_kind == "eval"))) + 
      facet_grid(age_cat~q_kind)

plot of chunk unnamed-chunk-2

niceness rating

“How nice is he/she?”

# plot: niceness
mss <- d1 %>%
  filter(q_kind == "niceness") %>%
  group_by(polite, age_cat, subid) %>%
  summarize(
    answer = mean(answer, na.rm=TRUE)
  )
ms <- aggregate(answer ~ polite + age_cat, mss, mean)
ms$cih <- aggregate(answer ~ polite + age_cat, mss, ci.high)$answer
ms$cil <- aggregate(answer ~ polite + age_cat, mss, ci.low)$answer

qplot(polite, answer, 
      fill = polite, 
      geom="bar", position = "dodge", stat="identity",
      data=subset(ms, answer!="NA")) + 
  facet_wrap(~age_cat) +
  geom_errorbar(aes(ymin=answer-cil,ymax=answer+cih,width=.1))

plot of chunk unnamed-chunk-3

# histogram
qplot(as.factor(answer),
      fill = polite,
      geom="histogram",
      data=subset(d1, answer!="NA" & (q_kind == "niceness"))) + 
      facet_grid(polite~age_cat)

plot of chunk unnamed-chunk-3

listener’s feeling

“How did he/she feel (after hearing the speaker’s utterance)?”

# plot: listener feeling inference
mss <- d1 %>%
  filter(q_kind == "Lfeel") %>%
  group_by(polite, age_cat, subid) %>%
  summarize(
    answer = mean(answer, na.rm=TRUE)
  )

# look at correctness of responses
mss[mss$polite == "impolite",]$answer <- (mss[mss$polite == "impolite",]$answer - 1) * (-1)

ms <- aggregate(answer ~ polite + age_cat, mss, mean)
ms$cih <- aggregate(answer ~ polite + age_cat, mss, ci.high)$answer
ms$cil <- aggregate(answer ~ polite + age_cat, mss, ci.low)$answer

qplot(polite, answer, 
      fill = polite, 
      geom="bar", position = "dodge", stat="identity",
      data=subset(ms, answer!="NA")) + 
  facet_wrap(~age_cat) +
  geom_errorbar(aes(ymin=answer-cil,ymax=answer+cih,width=.1))

plot of chunk unnamed-chunk-4

# histogram
# temporary data d2
d2 <- d1
# look at correctness of responses
d2$answer <- as.numeric(as.character(d2$answer))
d2[d2$polite == "impolite",]$answer <- (d2[d2$polite == "impolite",]$answer - 1) * (-1)

qplot(as.factor(answer),
      fill = polite,
      geom="histogram",
      data=subset(d2, answer!="NA" & (q_kind == "Lfeel"))) + 
      facet_grid(polite~age_cat)

plot of chunk unnamed-chunk-4

niceness x listener’s feeling

d <- read.csv("/Users/ericang/Documents/Research/Politeness/trupol_git/data_analysis/data/trupol_data.csv")
log <- read.csv("/Users/ericang/Documents/Research/Politeness/trupol_git/data_analysis/info/trupol_subj.csv")

# join with subj log
d <- join(d, log)
## Joining by: subid
# select key vars 
d <- d %>%
  select(subid, Age, trial1_niceness, trial2_niceness, trial3_niceness, trial4_niceness, trial1_Lfeel_val, trial2_Lfeel_val, trial3_Lfeel_val, trial4_Lfeel_val)

# categorize age
d$Age <- as.numeric(as.character(d$Age))
d <- cbind(d, age_cat = cut(d$Age, breaks=c(3, 4, 5, 6, 7)))
levels(d$age_cat) <- c("3", "4", "5", "6")

# reshape data
d <- d %>%
  gather("q", "answer", 3:10)
## Warning: attributes are not identical across measure variables; they will
## be dropped
# add columns to categorize vars
d1 <- d %>%
  mutate(trial = substring(q, 6, 6), q_kind = substring(q, 8, 16)) %>%
  select(subid, age_cat, trial, q_kind, answer) %>%
  spread(q_kind, answer) %>%
  mutate(polite = factor(trial,
                         levels = c("1", "2", "3", "4"),
                         labels = c(
                                    "impolite", "polite", "polite", "impolite")))
## Warning: duplicated levels in factors are deprecated
d1$Lfeel_val <- as.factor(as.numeric(as.character(d1$Lfeel_val)))
d1$niceness <- as.numeric(as.character(d1$niceness))
## Warning: NAs introduced by coercion
d1$polite <- as.factor(as.character(d1$polite))

ggplot(subset(d1, Lfeel_val != "NA"), aes(x=Lfeel_val, y=niceness)) +
  geom_jitter(position = position_jitter(height = .1, width = .3), aes(colour = niceness))

plot of chunk unnamed-chunk-5

# facet by politeness
ggplot(subset(d1, Lfeel_val != "NA"), aes(x=Lfeel_val, y=niceness)) +
  geom_jitter(position = position_jitter(height = .1, width = .3), aes(colour = niceness)) +
  facet_grid(.~polite)

plot of chunk unnamed-chunk-5