# preprocess
# rm(list=ls())
library(ggplot2)
library(tidyverse)
## Loading tidyverse: tibble
## Loading tidyverse: tidyr
## Loading tidyverse: readr
## Loading tidyverse: purrr
## Loading tidyverse: dplyr
## Conflicts with tidy packages ----------------------------------------------
## filter(): dplyr, stats
## lag(): dplyr, stats
library(langcog)
##
## Attaching package: 'langcog'
## The following object is masked from 'package:base':
##
## scale
library(ggthemes)
##
## Attaching package: 'ggthemes'
## The following objects are masked from 'package:langcog':
##
## scale_color_solarized, scale_colour_solarized,
## scale_fill_solarized
library(lme4)
## Loading required package: Matrix
##
## Attaching package: 'Matrix'
## The following object is masked from 'package:tidyr':
##
## expand
d1 <- read.csv("/Users/ericang/Documents/Research/polcon/data_analysis/data/processed_data/polcon_v2.csv") %>%
gather(var, question_type, qtype1:qtype2) %>%
mutate(trial = trial_num) %>%
mutate(var = substr(var, 6, 6)) %>%
select(subid, order, var, trial, question_type)
## Warning: attributes are not identical across measure variables; they will
## be dropped
d2 <- read.csv("/Users/ericang/Documents/Research/polcon/data_analysis/data/processed_data/polcon_v2.csv") %>%
gather(var, correct, correct1:correct2) %>%
mutate(trial = trial_num) %>%
mutate(var = substr(var, 8, 8)) %>%
select(subid, order, trial, var, correct)
d1 <- left_join(d1, d2) %>%
mutate(trial = as.factor(as.character(trial))) %>%
mutate(question_type = as.factor(ifelse(order == "2" & var == "2" & trial == "5", "comp", question_type)))
## Joining, by = c("subid", "order", "var", "trial")
# order sheet with trial info
order <- read.csv("/Users/ericang/Documents/Research/polcon/data_analysis/data/info/v1/v1_order.csv") %>%
mutate(trial = substr(trial, 6,9)) %>%
separate(trial, c("trial", "var"))
d1_1 <- left_join(filter(d1, order == "1"),
select(order, trial, var, request_type_ord1)) %>%
mutate(request_type = request_type_ord1) %>%
select(subid, order, trial, request_type,question_type, correct)
## Joining, by = c("var", "trial")
## Warning in left_join_impl(x, y, by$x, by$y, suffix$x, suffix$y): joining
## character vector and factor, coercing into character vector
d1_2 <- left_join(filter(d1, order == "2"),
select(order, trial, var, request_type_ord2)) %>%
mutate(request_type = request_type_ord2) %>%
select(subid, order, trial, request_type,question_type, correct)
## Joining, by = c("var", "trial")
## Warning in left_join_impl(x, y, by$x, by$y, suffix$x, suffix$y): joining
## character vector and factor, coercing into character vector
d1 <- rbind(d1_1, d1_2) %>%
mutate(valence = ifelse(question_type %in% c("polite","nice"),
"positive", "negative"),
adjective = ifelse(question_type %in% c("nice","mean"), "easy","hard"))
# log sheet with subj info
log <- read.csv("/Users/ericang/Documents/Research/polcon/data_analysis/data/info/v2/subject_log.csv") %>%
select(-X)
d1 <- left_join(d1, log)
## Joining, by = "subid"
## Warning in left_join_impl(x, y, by$x, by$y, suffix$x, suffix$y): joining
## factors with different levels, coercing to character vector
# filter by exclusion criteria
d1 <- d1 %>%
filter(keepdrop == "keep") %>%
mutate(age_fct = factor(floor(age))) %>%
filter(age_fct == 3 | age_fct == 4)
# make factors
d1 <- d1 %>%
mutate(subid = as.factor(subid),
order = as.factor(order),
question_type = as.factor(question_type),
gender = as.factor(gender))
# drop those who failed practice trials
subj_failed_practice <- filter(d1, (trial == "0" & correct == "0"))
d1 <- d1 %>%
filter(!subid %in% subj_failed_practice$subid) %>%
filter(trial != "0")
subj <- d1 %>%
distinct(subid, .keep_all=TRUE) %>%
group_by(age_fct) %>%
summarise(count = n())
subj
## # A tibble: 2 × 2
## age_fct count
## <fctr> <int>
## 1 3 16
## 2 4 20
ms <- d1 %>%
mutate(correct = as.numeric(as.character(correct))) %>%
group_by(age_fct, question_type, subid) %>%
summarize(
correct = mean(correct, na.rm=TRUE)
) %>%
group_by(age_fct, question_type) %>%
multi_boot_standard(column = "correct", na.rm=TRUE) %>%
ungroup() %>%
mutate(correct = mean,
question_type = factor(question_type, levels=c("polite", "nice", "rude", "mean", "play", "comp"))) %>%
mutate(
question_type = factor(question_type, labels=c("polite", "nice", "rude", "mean", "play with", "compliance")),
age_fct = factor(age_fct, labels=c("3-yr-olds", "4-yr-olds")))
## Joining, by = c("age_fct", "question_type")
p <- ggplot(subset(ms, correct!="NA" & (question_type != "play with" & question_type != "compliance")),
aes(x=age_fct, y=correct, fill=age_fct))
p +
geom_bar(position=position_dodge(), stat = "identity") +
# geom_errorbar(position=position_dodge(.9), aes(ymin=ci_lower,ymax=ci_upper,width=.1)) +
facet_grid(.~question_type) +
xlab("Age") +
ylab("Proportion correct") +
ggtitle("Correct answers on questions \"Who was being more _____ ?\"") +
scale_fill_solarized(guide = "none") +
geom_errorbar(position=position_dodge(.9), aes(ymin=ci_lower,ymax=ci_upper,width=.1)) +
geom_hline(yintercept=.50,lty=4) +
theme_few()
ms <- d1 %>%
filter(question_type != "comp"& question_type != "play") %>%
mutate(correct = as.numeric(as.character(correct))) %>%
group_by(age_fct, request_type, subid) %>%
summarize(
correct = mean(correct, na.rm=TRUE)
) %>%
group_by(age_fct, request_type) %>%
multi_boot_standard(column = "correct", na.rm=TRUE) %>%
ungroup() %>%
mutate(correct = mean,
age_fct = factor(age_fct, labels=c("3-yr-olds", "4-yr-olds")),
request_type = recode(request_type,
idq = "can you~",
please_idq = "can you please~"))
## Joining, by = c("age_fct", "request_type")
p <- ggplot(subset(ms, correct!="NA"),
aes(x=age_fct, y=correct, fill=age_fct))
p +
geom_bar(position=position_dodge(), stat = "identity") +
# geom_errorbar(position=position_dodge(.9), aes(ymin=ci_lower,ymax=ci_upper,width=.1)) +
facet_grid(.~request_type) +
xlab("Age") +
ylab("Proportion correct") +
ggtitle("Correct answers on questions \"Who was being more _____ ?\"") +
scale_fill_solarized(guide = "none") +
geom_errorbar(position=position_dodge(.9), aes(ymin=ci_lower,ymax=ci_upper,width=.1)) +
geom_hline(yintercept=.50,lty=4) +
theme_few()
ms <- d1 %>%
filter(question_type == "comp" | question_type == "play") %>%
mutate(correct = as.numeric(as.character(correct))) %>%
group_by(age_fct, question_type, subid) %>%
summarize(
correct = mean(correct, na.rm=TRUE)
) %>%
group_by(age_fct, question_type) %>%
multi_boot_standard(column = "correct", na.rm=TRUE) %>%
ungroup() %>%
mutate(correct = mean,
age_fct = factor(age_fct, labels=c("3-yr-olds", "4-yr-olds")))
## Joining, by = c("age_fct", "question_type")
p <- ggplot(subset(ms, correct!="NA"),
aes(x=question_type, y=correct, fill=age_fct))
p +
geom_bar(position=position_dodge(), stat = "identity") +
# geom_errorbar(position=position_dodge(.9), aes(ymin=ci_lower,ymax=ci_upper,width=.1)) +
# facet_grid(.~question_type) +
xlab("Question type") +
ylab("Proportion choosing polite speaker") +
ggtitle("Correct answers on questions \"Who would you rather play with?\" and \"Who [gets what they want]?\"") +
scale_fill_solarized(guide = guide_legend(title = "Question type")) +
geom_errorbar(position=position_dodge(.9), aes(ymin=ci_lower,ymax=ci_upper,width=.1)) +
geom_hline(yintercept=.50,lty=4) +
theme_few()
ms <- d1 %>%
filter(question_type != "play" & question_type != "comp") %>%
mutate(correct = as.numeric(as.character(correct))) %>%
group_by(age_fct, question_type, request_type, subid) %>%
summarize(
correct = mean(correct, na.rm=TRUE)
) %>%
group_by(age_fct, question_type, request_type) %>%
multi_boot_standard(column = "correct", na.rm=TRUE) %>%
ungroup() %>%
mutate(correct = mean,
# question_type = factor(question_type, levels=c("polite", "nice", "rude", "mean", "play", "comp"))) %>%
# mutate(question_type = factor(question_type, labels=c("polite", "nice", "rude", "mean", "play with", "compliance")),
age_fct = factor(age_fct, labels=c("3-yr-olds", "4-yr-olds")),
request_type = recode(request_type,
idq = "can you~",
please_idq = "can you please~"))
## Joining, by = c("age_fct", "question_type", "request_type")
p <- ggplot(filter(ms, !is.na(correct)),
aes(x=request_type, y=correct, fill=request_type))
p +
geom_bar(position=position_dodge(), stat = "identity") +
facet_grid(question_type~age_fct) +
xlab("Age") +
ylab("Proportion choosing polite speaker") +
ggtitle("Correct answers on questions \"Who was being more _____ ?\"") +
scale_fill_solarized(guide = "none") +
geom_errorbar(position=position_dodge(.9), aes(ymin=ci_lower,ymax=ci_upper,width=.1)) +
geom_hline(yintercept=.50,lty=4) +
theme_few()
LMER
model_data <- filter(d1, question_type %in% c("nice","mean", "rude","polite")) %>%
as_tibble %>%
filter(!is.na(correct))
glmer_mod_full <- glmer(correct ~ age_fct * question_type +
(1 | subid),
data = model_data,
family = "binomial")
summary(glmer_mod_full)
## Generalized linear mixed model fit by maximum likelihood (Laplace
## Approximation) [glmerMod]
## Family: binomial ( logit )
## Formula: correct ~ age_fct * question_type + (1 | subid)
## Data: model_data
##
## AIC BIC logLik deviance df.resid
## 503.8 540.4 -242.9 485.8 423
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -2.6689 -0.9379 0.3747 0.6261 1.3471
##
## Random effects:
## Groups Name Variance Std.Dev.
## subid (Intercept) 1.009 1.004
## Number of obs: 432, groups: subid, 36
##
## Fixed effects:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 0.7576 0.4172 1.816 0.0694 .
## age_fct4 0.8980 0.5849 1.535 0.1247
## question_typenice -0.2135 0.4592 -0.465 0.6419
## question_typepolite 0.3385 0.4727 0.716 0.4739
## question_typerude 0.2225 0.4687 0.475 0.6349
## age_fct4:question_typenice 0.3351 0.6691 0.501 0.6165
## age_fct4:question_typepolite -1.0536 0.6550 -1.608 0.1077
## age_fct4:question_typerude -0.9376 0.6520 -1.438 0.1504
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Correlation of Fixed Effects:
## (Intr) ag_fc4 qstn_typn qstn_typp qstn_typr
## age_fct4 -0.701
## qustn_typnc -0.562 0.399
## qstn_typplt -0.539 0.387 0.492
## qustn_typrd -0.545 0.390 0.497 0.484
## ag_fct4:qstn_typn 0.386 -0.567 -0.686 -0.338 -0.341
## ag_fct4:qstn_typp 0.385 -0.584 -0.355 -0.722 -0.350
## ag_fct4:qstn_typr 0.388 -0.587 -0.357 -0.349 -0.719
## ag_fct4:qstn_typn ag_fct4:qstn_typp
## age_fct4
## qustn_typnc
## qstn_typplt
## qustn_typrd
## ag_fct4:qstn_typn
## ag_fct4:qstn_typp 0.505
## ag_fct4:qstn_typr 0.508 0.524
glmer_mod_main <- glmer(correct ~ age_fct + valence + adjective + request_type +
(1 | subid),
data = model_data,
family = "binomial")
summary(glmer_mod_main)
## Generalized linear mixed model fit by maximum likelihood (Laplace
## Approximation) [glmerMod]
## Family: binomial ( logit )
## Formula: correct ~ age_fct + valence + adjective + request_type + (1 |
## subid)
## Data: model_data
##
## AIC BIC logLik deviance df.resid
## 501.3 529.7 -243.6 487.3 425
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -2.9575 -0.9281 0.3853 0.6087 1.3020
##
## Random effects:
## Groups Name Variance Std.Dev.
## subid (Intercept) 0.9987 0.9993
## Number of obs: 432, groups: subid, 36
##
## Fixed effects:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 8.277e-01 3.819e-01 2.167 0.0302 *
## age_fct4 4.635e-01 4.123e-01 1.124 0.2610
## valencepositive -1.517e-06 2.292e-01 0.000 1.0000
## adjectivehard -2.142e-01 2.294e-01 -0.933 0.3506
## request_typeplease -1.139e-01 2.731e-01 -0.417 0.6764
## request_typeplease_idq 4.984e-01 2.874e-01 1.734 0.0829 .
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Correlation of Fixed Effects:
## (Intr) ag_fc4 vlncps adjctv rqst_t
## age_fct4 -0.582
## valencepstv -0.300 0.000
## adjectivhrd -0.311 -0.005 0.000
## rqst_typpls -0.364 -0.002 0.000 0.002
## rqst_typpl_ -0.338 0.010 0.000 -0.009 0.480
model_data %>%
group_by(age_fct, valence, adjective, request_type) %>%
summarise(mean = mean(correct, na.rm=TRUE)) %>%
ggplot(aes(x = age_fct, y = mean, fill = adjective)) +
geom_bar(stat = "identity", position = "dodge") +
facet_grid(request_type ~ valence)
model_data <- filter(d1, question_type %in% c("comp", "play")) %>%
as_tibble %>%
filter(!is.na(correct))
glmer_mod_full <- glmer(correct ~ age_fct * question_type +
(1 | subid),
data = model_data,
family = "binomial")
summary(glmer_mod_full)
## Generalized linear mixed model fit by maximum likelihood (Laplace
## Approximation) [glmerMod]
## Family: binomial ( logit )
## Formula: correct ~ age_fct * question_type + (1 | subid)
## Data: model_data
##
## AIC BIC logLik deviance df.resid
## 508.6 528.9 -249.3 498.6 427
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -2.5162 -0.9049 0.4790 0.6029 1.2705
##
## Random effects:
## Groups Name Variance Std.Dev.
## subid (Intercept) 0.5983 0.7735
## Number of obs: 432, groups: subid, 36
##
## Fixed effects:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 0.71003 0.29518 2.405 0.0162 *
## age_fct4 0.67794 0.41174 1.647 0.0997 .
## question_typeplay -0.03733 0.31715 -0.118 0.9063
## age_fct4:question_typeplay -0.13272 0.44918 -0.296 0.7676
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Correlation of Fixed Effects:
## (Intr) ag_fc4 qstn_t
## age_fct4 -0.708
## qstn_typply -0.523 0.376
## ag_fct4:qs_ 0.368 -0.537 -0.706
##
d0 <- read.csv("/Users/ericang/Documents/Research/polcon/data_analysis/data/processed_data/polcon_v1_data.csv") %>%
gather(trial, answer, trial1_1:trial12_2) %>%
select(subid, order, trial, answer)
order <- read.csv("/Users/ericang/Documents/Research/polcon/data_analysis/data/info/v1/v1_order.csv")
d0_1 <- left_join(d0, order) %>%
filter(order == "1") %>%
mutate(speaker = speaker_ord1,
request_type = request_type_ord1,
question_type = question_type_ord1,
correct = correct_ord1,
polite_speaker = polite_speaker_ord1) %>%
select(subid, order, trial, speaker, request_type,question_type, correct, polite_speaker, answer) %>%
mutate(correct = ifelse(correct == answer, 1, 0),
choose_polite = ifelse(polite_speaker == answer, 1, 0))
## Joining, by = "trial"
## Warning in left_join_impl(x, y, by$x, by$y, suffix$x, suffix$y): joining
## factor and character vector, coercing into character vector
d0_2 <- left_join(d0, order) %>%
filter(order == "2") %>%
mutate(speaker = speaker_ord2,
request_type = request_type_ord2,
question_type = question_type_ord2,
correct = correct_ord2,
polite_speaker = polite_speaker_ord2) %>%
select(subid, order, trial, speaker, request_type,question_type, correct, polite_speaker, answer) %>%
mutate(correct = ifelse(correct == answer, 1, 0),
choose_polite = ifelse(polite_speaker == answer, 1, 0))
## Joining, by = "trial"
## Warning in left_join_impl(x, y, by$x, by$y, suffix$x, suffix$y): joining
## factor and character vector, coercing into character vector
d0 <- rbind(d0_1, d0_2)
log <- read.csv("/Users/ericang/Documents/Research/polcon/data_analysis/data/info/v1/v1_processed_log.csv")
d0 <- left_join(d0, log) %>%
mutate(age_fct = age)
## Joining, by = "subid"
d_all <- rbind(
d0 %>%
select(age_fct, correct, order, question_type, request_type, subid, trial) %>%
mutate(expt = "Study A") %>%
mutate(question_type = factor(question_type, levels = c("polite", "nice", "rude", "mean", "play_with", "compliance")),
question_type = factor(question_type, labels = c("polite", "nice", "rude", "mean", "play", "comp"))),
d1 %>% select(age_fct, correct, order, question_type, request_type, subid, trial) %>%
mutate(expt = "Study B")
)
ms <- d_all %>%
mutate(correct = as.numeric(as.character(correct))) %>%
group_by(expt, age_fct, question_type, subid) %>%
summarize(
correct = mean(correct, na.rm=TRUE)
) %>%
group_by(expt, age_fct, question_type) %>%
multi_boot_standard(column = "correct", na.rm=TRUE) %>%
ungroup() %>%
mutate(correct = mean,
question_type = factor(question_type, levels=c("polite", "nice", "rude", "mean", "play", "comp"))) %>%
mutate(
question_type = factor(question_type, labels=c("polite", "nice", "rude", "mean", "play with", "compliance")),
age_fct = factor(age_fct, labels=c("3-yr-olds", "4-yr-olds")))
## Joining, by = c("expt", "age_fct", "question_type")
p <- ggplot(subset(ms, correct!="NA" & (question_type != "play with" & question_type != "compliance")),
aes(x=age_fct, y=correct, fill=age_fct))
p +
geom_bar(position=position_dodge(), stat = "identity") +
# geom_errorbar(position=position_dodge(.9), aes(ymin=ci_lower,ymax=ci_upper,width=.1)) +
facet_grid(expt~question_type) +
xlab("Age") +
ylab("Proportion choosing correctly") +
ggtitle("Correct answers on questions \"Who was being more _____ ?\"") +
scale_fill_solarized(guide = "none")+
geom_errorbar(position=position_dodge(.9), aes(ymin=ci_lower,ymax=ci_upper,width=.1)) +
geom_hline(yintercept=.50,lty=4) +
theme_few()
p <- ggplot(subset(ms, correct!="NA" & (question_type == "play with" | question_type == "compliance")),
aes(x=question_type, y=correct, fill=age_fct))
p +
geom_bar(position=position_dodge(), stat = "identity") +
# geom_errorbar(position=position_dodge(.9), aes(ymin=ci_lower,ymax=ci_upper,width=.1)) +
facet_grid(expt~.) +
xlab("Question type") +
ylab("Proportion choosing polite speaker") +
ggtitle("Correct answers on questions\n \"Who would you rather play with?\"\n and \"Who [gets what they want]?\"") +
geom_errorbar(position=position_dodge(.9), aes(ymin=ci_lower,ymax=ci_upper,width=.1)) +
geom_hline(yintercept=.50,lty=4) +
scale_fill_solarized(guide = guide_legend(title = "Question type"))+
theme_few()
ms <- d_all %>%
mutate(correct = as.numeric(as.character(correct))) %>%
group_by(expt, age_fct, request_type, subid) %>%
summarize(
correct = mean(correct, na.rm=TRUE)
) %>%
group_by(expt, age_fct, request_type) %>%
multi_boot_standard(column = "correct", na.rm=TRUE) %>%
ungroup() %>%
mutate(correct = mean,
age_fct = factor(age_fct, labels=c("3-yr-olds", "4-yr-olds")),
request_type = recode(request_type,
idq = "can you ~",
please_idq = "can you please ~"))
## Joining, by = c("expt", "age_fct", "request_type")
p <- ggplot(filter(ms, !is.na(correct)),
aes(x=request_type, y=correct, fill=age_fct))
p +
geom_bar(position=position_dodge(), stat = "identity") +
# geom_errorbar(position=position_dodge(.9), aes(ymin=ci_lower,ymax=ci_upper,width=.1)) +
facet_grid(expt~.) +
xlab("Markers") +
ylab("Proportion choosing polite speaker") +
ggtitle("Judgments based on polite markers") +
geom_errorbar(position=position_dodge(.9), aes(ymin=ci_lower,ymax=ci_upper,width=.1)) +
geom_hline(yintercept=.50,lty=4) +
scale_fill_solarized(guide = guide_legend(title = "Markers"))+
theme_few()