# preprocess
# rm(list=ls())
library(ggplot2)
library(tidyverse)

## Loading tidyverse: tibble
## Loading tidyverse: tidyr
## Loading tidyverse: readr
## Loading tidyverse: purrr
## Loading tidyverse: dplyr

## Conflicts with tidy packages ----------------------------------------------

## filter(): dplyr, stats
## lag():    dplyr, stats

library(langcog)

## 
## Attaching package: 'langcog'

## The following object is masked from 'package:base':
## 
##     scale

library(ggthemes)

## 
## Attaching package: 'ggthemes'

## The following objects are masked from 'package:langcog':
## 
##     scale_color_solarized, scale_colour_solarized,
##     scale_fill_solarized

library(lme4)

## Loading required package: Matrix

## 
## Attaching package: 'Matrix'

## The following object is masked from 'package:tidyr':
## 
##     expand

d1 <- read.csv("/Users/ericang/Documents/Research/polcon/data_analysis/data/processed_data/polcon_v2.csv") %>%
  gather(var, question_type, qtype1:qtype2) %>%
  mutate(trial = trial_num) %>%
  mutate(var = substr(var, 6, 6)) %>%
  select(subid, order, var, trial, question_type)

## Warning: attributes are not identical across measure variables; they will
## be dropped

d2 <- read.csv("/Users/ericang/Documents/Research/polcon/data_analysis/data/processed_data/polcon_v2.csv") %>%
  gather(var, correct, correct1:correct2) %>%
  mutate(trial = trial_num) %>%
  mutate(var = substr(var, 8, 8)) %>%
  select(subid, order, trial, var, correct)

d1 <- left_join(d1, d2) %>%
  mutate(trial = as.factor(as.character(trial))) %>%
  mutate(question_type = as.factor(ifelse(order == "2" & var == "2" & trial == "5", "comp", question_type)))

## Joining, by = c("subid", "order", "var", "trial")

# order sheet with trial info
order <- read.csv("/Users/ericang/Documents/Research/polcon/data_analysis/data/info/v1/v1_order.csv") %>%
  mutate(trial = substr(trial, 6,9)) %>%
  separate(trial, c("trial", "var"))

d1_1 <- left_join(filter(d1, order == "1"), 
                  select(order, trial, var, request_type_ord1)) %>%
  mutate(request_type = request_type_ord1) %>%
  select(subid, order, trial, request_type,question_type, correct)

## Joining, by = c("var", "trial")

## Warning in left_join_impl(x, y, by$x, by$y, suffix$x, suffix$y): joining
## character vector and factor, coercing into character vector

d1_2 <- left_join(filter(d1, order == "2"), 
                  select(order, trial, var, request_type_ord2)) %>%
  mutate(request_type = request_type_ord2) %>%
  select(subid, order, trial, request_type,question_type, correct)

## Joining, by = c("var", "trial")

## Warning in left_join_impl(x, y, by$x, by$y, suffix$x, suffix$y): joining
## character vector and factor, coercing into character vector

d1 <- rbind(d1_1, d1_2) %>%
    mutate(valence = ifelse(question_type %in% c("polite","nice"), 
                          "positive", "negative"), 
         adjective = ifelse(question_type %in% c("nice","mean"), "easy","hard")) 

# log sheet with subj info
log <- read.csv("/Users/ericang/Documents/Research/polcon/data_analysis/data/info/v2/subject_log.csv") %>%
  select(-X)

d1 <- left_join(d1, log)

## Joining, by = "subid"

## Warning in left_join_impl(x, y, by$x, by$y, suffix$x, suffix$y): joining
## factors with different levels, coercing to character vector

# filter by exclusion criteria
d1 <- d1 %>%
  filter(keepdrop == "keep") %>%
  mutate(age_fct = factor(floor(age))) %>%
  filter(age_fct == 3 | age_fct == 4)

# make factors
d1 <- d1 %>%
  mutate(subid = as.factor(subid),
         order = as.factor(order),
         question_type = as.factor(question_type),
         gender = as.factor(gender))

# drop those who failed practice trials
subj_failed_practice <- filter(d1, (trial == "0" & correct == "0"))
d1 <- d1 %>%
  filter(!subid %in% subj_failed_practice$subid) %>%
  filter(trial != "0")

subj <- d1 %>%
    distinct(subid, .keep_all=TRUE) %>%
    group_by(age_fct) %>%
    summarise(count = n())
subj

## # A tibble: 2 × 2
##   age_fct count
##    <fctr> <int>
## 1       3    16
## 2       4    20

analysis

ms <- d1 %>%
  mutate(correct = as.numeric(as.character(correct))) %>%
  group_by(age_fct, question_type, subid) %>%
  summarize(
    correct = mean(correct, na.rm=TRUE)
  ) %>%
  group_by(age_fct, question_type) %>%
  multi_boot_standard(column = "correct", na.rm=TRUE) %>%
  ungroup() %>%
  mutate(correct = mean,
         question_type = factor(question_type, levels=c("polite", "nice", "rude", "mean", "play", "comp"))) %>%
  mutate(
         question_type = factor(question_type, labels=c("polite", "nice", "rude", "mean", "play with", "compliance")),
         age_fct = factor(age_fct, labels=c("3-yr-olds", "4-yr-olds")))

## Joining, by = c("age_fct", "question_type")

p <- ggplot(subset(ms, correct!="NA" & (question_type != "play with" & question_type != "compliance")), 
            aes(x=age_fct, y=correct, fill=age_fct))
p + 
  geom_bar(position=position_dodge(), stat = "identity") +
#   geom_errorbar(position=position_dodge(.9), aes(ymin=ci_lower,ymax=ci_upper,width=.1)) +
  facet_grid(.~question_type) +
  xlab("Age") +
  ylab("Proportion correct") + 
  ggtitle("Correct answers on questions \"Who was being more _____ ?\"") +
  scale_fill_solarized(guide = "none") +
  geom_errorbar(position=position_dodge(.9), aes(ymin=ci_lower,ymax=ci_upper,width=.1)) +
    geom_hline(yintercept=.50,lty=4) + 
  theme_few()

ms <- d1 %>%
  filter(question_type != "comp"& question_type != "play") %>%
  mutate(correct = as.numeric(as.character(correct))) %>%
  group_by(age_fct, request_type, subid) %>%
  summarize(
    correct = mean(correct, na.rm=TRUE)
  ) %>%
  group_by(age_fct, request_type) %>%
  multi_boot_standard(column = "correct", na.rm=TRUE) %>%
  ungroup() %>%
  mutate(correct = mean,
         age_fct = factor(age_fct, labels=c("3-yr-olds", "4-yr-olds")),
         request_type = recode(request_type, 
                               idq = "can you~",
                               please_idq = "can you please~"))

## Joining, by = c("age_fct", "request_type")

p <- ggplot(subset(ms, correct!="NA"), 
            aes(x=age_fct, y=correct, fill=age_fct))
p + 
  geom_bar(position=position_dodge(), stat = "identity") +
#   geom_errorbar(position=position_dodge(.9), aes(ymin=ci_lower,ymax=ci_upper,width=.1)) +
  facet_grid(.~request_type) +
  xlab("Age") +
  ylab("Proportion correct") + 
  ggtitle("Correct answers on questions \"Who was being more _____ ?\"") +
  scale_fill_solarized(guide = "none") +
  geom_errorbar(position=position_dodge(.9), aes(ymin=ci_lower,ymax=ci_upper,width=.1)) +
    geom_hline(yintercept=.50,lty=4) + 
  theme_few()

ms <- d1 %>%
  filter(question_type == "comp" | question_type == "play") %>%
  mutate(correct = as.numeric(as.character(correct))) %>%
  group_by(age_fct, question_type, subid) %>%
  summarize(
    correct = mean(correct, na.rm=TRUE)
  ) %>%
  group_by(age_fct, question_type) %>%
  multi_boot_standard(column = "correct", na.rm=TRUE) %>%
  ungroup() %>%
  mutate(correct = mean,
         age_fct = factor(age_fct, labels=c("3-yr-olds", "4-yr-olds")))

## Joining, by = c("age_fct", "question_type")

p <- ggplot(subset(ms, correct!="NA"), 
            aes(x=question_type, y=correct, fill=age_fct))
p + 
  geom_bar(position=position_dodge(), stat = "identity") +
#   geom_errorbar(position=position_dodge(.9), aes(ymin=ci_lower,ymax=ci_upper,width=.1)) +
  # facet_grid(.~question_type) +
  xlab("Question type") +
  ylab("Proportion choosing polite speaker") + 
  ggtitle("Correct answers on questions \"Who would you rather play with?\" and \"Who [gets what they want]?\"") +
  scale_fill_solarized(guide = guide_legend(title = "Question type")) +
  geom_errorbar(position=position_dodge(.9), aes(ymin=ci_lower,ymax=ci_upper,width=.1)) +
    geom_hline(yintercept=.50,lty=4) +
  theme_few()

ms <- d1 %>%
  filter(question_type != "play" & question_type != "comp") %>%
  mutate(correct = as.numeric(as.character(correct))) %>%
  group_by(age_fct, question_type, request_type, subid) %>%
  summarize(
    correct = mean(correct, na.rm=TRUE)
  ) %>%
  group_by(age_fct, question_type, request_type) %>%
  multi_boot_standard(column = "correct", na.rm=TRUE) %>%
  ungroup() %>%
  mutate(correct = mean,
         # question_type = factor(question_type, levels=c("polite", "nice", "rude", "mean", "play", "comp"))) %>%
  # mutate(question_type = factor(question_type, labels=c("polite", "nice", "rude", "mean", "play with", "compliance")),
         age_fct = factor(age_fct, labels=c("3-yr-olds", "4-yr-olds")),
         request_type = recode(request_type, 
                               idq = "can you~",
                               please_idq = "can you please~"))

## Joining, by = c("age_fct", "question_type", "request_type")

p <- ggplot(filter(ms, !is.na(correct)), 
            aes(x=request_type, y=correct, fill=request_type))
p + 
  geom_bar(position=position_dodge(), stat = "identity") +
  facet_grid(question_type~age_fct) +
  xlab("Age") +
  ylab("Proportion choosing polite speaker") + 
  ggtitle("Correct answers on questions \"Who was being more _____ ?\"") +
  scale_fill_solarized(guide = "none") +
  geom_errorbar(position=position_dodge(.9), aes(ymin=ci_lower,ymax=ci_upper,width=.1)) +
  geom_hline(yintercept=.50,lty=4) +
  theme_few()

LMER

model_data <- filter(d1, question_type %in% c("nice","mean", "rude","polite")) %>%
  as_tibble %>%
  filter(!is.na(correct))

glmer_mod_full <- glmer(correct ~ age_fct * question_type + 
                          (1 | subid), 
                        data = model_data, 
                        family = "binomial")
summary(glmer_mod_full)

## Generalized linear mixed model fit by maximum likelihood (Laplace
##   Approximation) [glmerMod]
##  Family: binomial  ( logit )
## Formula: correct ~ age_fct * question_type + (1 | subid)
##    Data: model_data
## 
##      AIC      BIC   logLik deviance df.resid 
##    503.8    540.4   -242.9    485.8      423 
## 
## Scaled residuals: 
##     Min      1Q  Median      3Q     Max 
## -2.6689 -0.9379  0.3747  0.6261  1.3471 
## 
## Random effects:
##  Groups Name        Variance Std.Dev.
##  subid  (Intercept) 1.009    1.004   
## Number of obs: 432, groups:  subid, 36
## 
## Fixed effects:
##                              Estimate Std. Error z value Pr(>|z|)  
## (Intercept)                    0.7576     0.4172   1.816   0.0694 .
## age_fct4                       0.8980     0.5849   1.535   0.1247  
## question_typenice             -0.2135     0.4592  -0.465   0.6419  
## question_typepolite            0.3385     0.4727   0.716   0.4739  
## question_typerude              0.2225     0.4687   0.475   0.6349  
## age_fct4:question_typenice     0.3351     0.6691   0.501   0.6165  
## age_fct4:question_typepolite  -1.0536     0.6550  -1.608   0.1077  
## age_fct4:question_typerude    -0.9376     0.6520  -1.438   0.1504  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Correlation of Fixed Effects:
##                   (Intr) ag_fc4 qstn_typn qstn_typp qstn_typr
## age_fct4          -0.701                                     
## qustn_typnc       -0.562  0.399                              
## qstn_typplt       -0.539  0.387  0.492                       
## qustn_typrd       -0.545  0.390  0.497     0.484             
## ag_fct4:qstn_typn  0.386 -0.567 -0.686    -0.338    -0.341   
## ag_fct4:qstn_typp  0.385 -0.584 -0.355    -0.722    -0.350   
## ag_fct4:qstn_typr  0.388 -0.587 -0.357    -0.349    -0.719   
##                   ag_fct4:qstn_typn ag_fct4:qstn_typp
## age_fct4                                             
## qustn_typnc                                          
## qstn_typplt                                          
## qustn_typrd                                          
## ag_fct4:qstn_typn                                    
## ag_fct4:qstn_typp  0.505                             
## ag_fct4:qstn_typr  0.508             0.524

glmer_mod_main <- glmer(correct ~ age_fct + valence + adjective + request_type + 
                     (1 | subid), 
                   data = model_data, 
                   family = "binomial")
summary(glmer_mod_main)

## Generalized linear mixed model fit by maximum likelihood (Laplace
##   Approximation) [glmerMod]
##  Family: binomial  ( logit )
## Formula: correct ~ age_fct + valence + adjective + request_type + (1 |  
##     subid)
##    Data: model_data
## 
##      AIC      BIC   logLik deviance df.resid 
##    501.3    529.7   -243.6    487.3      425 
## 
## Scaled residuals: 
##     Min      1Q  Median      3Q     Max 
## -2.9575 -0.9281  0.3853  0.6087  1.3020 
## 
## Random effects:
##  Groups Name        Variance Std.Dev.
##  subid  (Intercept) 0.9987   0.9993  
## Number of obs: 432, groups:  subid, 36
## 
## Fixed effects:
##                          Estimate Std. Error z value Pr(>|z|)  
## (Intercept)             8.277e-01  3.819e-01   2.167   0.0302 *
## age_fct4                4.635e-01  4.123e-01   1.124   0.2610  
## valencepositive        -1.517e-06  2.292e-01   0.000   1.0000  
## adjectivehard          -2.142e-01  2.294e-01  -0.933   0.3506  
## request_typeplease     -1.139e-01  2.731e-01  -0.417   0.6764  
## request_typeplease_idq  4.984e-01  2.874e-01   1.734   0.0829 .
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Correlation of Fixed Effects:
##             (Intr) ag_fc4 vlncps adjctv rqst_t
## age_fct4    -0.582                            
## valencepstv -0.300  0.000                     
## adjectivhrd -0.311 -0.005  0.000              
## rqst_typpls -0.364 -0.002  0.000  0.002       
## rqst_typpl_ -0.338  0.010  0.000 -0.009  0.480

model_data %>%
  group_by(age_fct, valence, adjective, request_type) %>%
  summarise(mean = mean(correct, na.rm=TRUE)) %>%
  ggplot(aes(x = age_fct, y = mean, fill = adjective)) + 
  geom_bar(stat = "identity", position = "dodge") + 
  facet_grid(request_type ~ valence)

model_data <- filter(d1, question_type %in% c("comp", "play")) %>%
  as_tibble %>%
  filter(!is.na(correct))

glmer_mod_full <- glmer(correct ~ age_fct * question_type + 
                          (1 | subid), 
                        data = model_data, 
                        family = "binomial")
summary(glmer_mod_full)

## Generalized linear mixed model fit by maximum likelihood (Laplace
##   Approximation) [glmerMod]
##  Family: binomial  ( logit )
## Formula: correct ~ age_fct * question_type + (1 | subid)
##    Data: model_data
## 
##      AIC      BIC   logLik deviance df.resid 
##    508.6    528.9   -249.3    498.6      427 
## 
## Scaled residuals: 
##     Min      1Q  Median      3Q     Max 
## -2.5162 -0.9049  0.4790  0.6029  1.2705 
## 
## Random effects:
##  Groups Name        Variance Std.Dev.
##  subid  (Intercept) 0.5983   0.7735  
## Number of obs: 432, groups:  subid, 36
## 
## Fixed effects:
##                            Estimate Std. Error z value Pr(>|z|)  
## (Intercept)                 0.71003    0.29518   2.405   0.0162 *
## age_fct4                    0.67794    0.41174   1.647   0.0997 .
## question_typeplay          -0.03733    0.31715  -0.118   0.9063  
## age_fct4:question_typeplay -0.13272    0.44918  -0.296   0.7676  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Correlation of Fixed Effects:
##             (Intr) ag_fc4 qstn_t
## age_fct4    -0.708              
## qstn_typply -0.523  0.376       
## ag_fct4:qs_  0.368 -0.537 -0.706

ver 1 and 2 together

##
d0 <- read.csv("/Users/ericang/Documents/Research/polcon/data_analysis/data/processed_data/polcon_v1_data.csv") %>%
  gather(trial, answer, trial1_1:trial12_2) %>%
  select(subid, order, trial, answer)

order <- read.csv("/Users/ericang/Documents/Research/polcon/data_analysis/data/info/v1/v1_order.csv")

d0_1 <- left_join(d0, order) %>%
  filter(order == "1") %>%
  mutate(speaker = speaker_ord1,
         request_type = request_type_ord1,
         question_type = question_type_ord1,
         correct = correct_ord1,
         polite_speaker = polite_speaker_ord1) %>%
  select(subid, order, trial, speaker, request_type,question_type, correct, polite_speaker, answer) %>%
  mutate(correct = ifelse(correct == answer, 1, 0),
         choose_polite = ifelse(polite_speaker == answer, 1, 0))

## Joining, by = "trial"

## Warning in left_join_impl(x, y, by$x, by$y, suffix$x, suffix$y): joining
## factor and character vector, coercing into character vector

d0_2 <- left_join(d0, order) %>%
  filter(order == "2") %>%
  mutate(speaker = speaker_ord2,
         request_type = request_type_ord2,
         question_type = question_type_ord2,
         correct = correct_ord2,
         polite_speaker = polite_speaker_ord2) %>%
  select(subid, order, trial, speaker, request_type,question_type, correct, polite_speaker, answer) %>%
  mutate(correct = ifelse(correct == answer, 1, 0),
         choose_polite = ifelse(polite_speaker == answer, 1, 0))

## Joining, by = "trial"

## Warning in left_join_impl(x, y, by$x, by$y, suffix$x, suffix$y): joining
## factor and character vector, coercing into character vector

d0 <- rbind(d0_1, d0_2)

log <- read.csv("/Users/ericang/Documents/Research/polcon/data_analysis/data/info/v1/v1_processed_log.csv")

d0 <- left_join(d0, log) %>%
  mutate(age_fct = age)

## Joining, by = "subid"

d_all <- rbind(
  d0 %>% 
    select(age_fct, correct, order, question_type, request_type, subid, trial) %>%
    mutate(expt = "Study A") %>%
    mutate(question_type = factor(question_type, levels = c("polite", "nice", "rude", "mean", "play_with", "compliance")),
           question_type = factor(question_type, labels = c("polite", "nice", "rude", "mean", "play", "comp"))), 
  d1 %>% select(age_fct, correct, order, question_type, request_type, subid, trial) %>%
    mutate(expt = "Study B")
  )

ms <- d_all %>%
  mutate(correct = as.numeric(as.character(correct))) %>%
  group_by(expt, age_fct, question_type, subid) %>%
  summarize(
    correct = mean(correct, na.rm=TRUE)
  ) %>%
  group_by(expt, age_fct, question_type) %>%
  multi_boot_standard(column = "correct", na.rm=TRUE) %>%
  ungroup() %>%
  mutate(correct = mean,
         question_type = factor(question_type, levels=c("polite", "nice", "rude", "mean", "play", "comp"))) %>%
  mutate(
    question_type = factor(question_type, labels=c("polite", "nice", "rude", "mean", "play with", "compliance")),
    age_fct = factor(age_fct, labels=c("3-yr-olds", "4-yr-olds")))

## Joining, by = c("expt", "age_fct", "question_type")

p <- ggplot(subset(ms, correct!="NA" & (question_type != "play with" & question_type != "compliance")), 
            aes(x=age_fct, y=correct, fill=age_fct))
p + 
  geom_bar(position=position_dodge(), stat = "identity") +
#   geom_errorbar(position=position_dodge(.9), aes(ymin=ci_lower,ymax=ci_upper,width=.1)) +
  facet_grid(expt~question_type) +
  xlab("Age") +
  ylab("Proportion choosing correctly") + 
  ggtitle("Correct answers on questions \"Who was being more _____ ?\"") +
  scale_fill_solarized(guide = "none")+
  geom_errorbar(position=position_dodge(.9), aes(ymin=ci_lower,ymax=ci_upper,width=.1)) +
    geom_hline(yintercept=.50,lty=4) +
  theme_few()

p <- ggplot(subset(ms, correct!="NA" & (question_type == "play with" | question_type == "compliance")), 
            aes(x=question_type, y=correct, fill=age_fct))
p + 
  geom_bar(position=position_dodge(), stat = "identity") +
#   geom_errorbar(position=position_dodge(.9), aes(ymin=ci_lower,ymax=ci_upper,width=.1)) +
  facet_grid(expt~.) +
  xlab("Question type") +
  ylab("Proportion choosing polite speaker") + 
  ggtitle("Correct answers on questions\n \"Who would you rather play with?\"\n and \"Who [gets what they want]?\"") +
  geom_errorbar(position=position_dodge(.9), aes(ymin=ci_lower,ymax=ci_upper,width=.1)) +
    geom_hline(yintercept=.50,lty=4) +
  scale_fill_solarized(guide = guide_legend(title = "Question type"))+
  theme_few()

ms <- d_all %>%
  mutate(correct = as.numeric(as.character(correct))) %>%
  group_by(expt, age_fct, request_type, subid) %>%
  summarize(
    correct = mean(correct, na.rm=TRUE)
  ) %>%
  group_by(expt, age_fct, request_type) %>%
  multi_boot_standard(column = "correct", na.rm=TRUE) %>%
  ungroup() %>%
  mutate(correct = mean,
    age_fct = factor(age_fct, labels=c("3-yr-olds", "4-yr-olds")),
    request_type = recode(request_type, 
                          idq = "can you ~",
                          please_idq = "can you please ~"))

## Joining, by = c("expt", "age_fct", "request_type")

p <- ggplot(filter(ms, !is.na(correct)), 
            aes(x=request_type, y=correct, fill=age_fct))
p + 
  geom_bar(position=position_dodge(), stat = "identity") +
#   geom_errorbar(position=position_dodge(.9), aes(ymin=ci_lower,ymax=ci_upper,width=.1)) +
  facet_grid(expt~.) +
  xlab("Markers") +
  ylab("Proportion choosing polite speaker") + 
  ggtitle("Judgments based on polite markers") +
  geom_errorbar(position=position_dodge(.9), aes(ymin=ci_lower,ymax=ci_upper,width=.1)) +
    geom_hline(yintercept=.50,lty=4) +
  scale_fill_solarized(guide = guide_legend(title = "Markers"))+
  theme_few()

polcon v2 data

Erica Yoon

Mar 14 2017

analysis

ver 1 and 2 together