loading and cleaning data
# preprocess
# rm(list=ls())
library(ggplot2)
library(tidyverse)
## ── Attaching packages ──────────────────────────────── tidyverse 1.2.1 ──
## ✔ tibble 1.4.2 ✔ purrr 0.2.4
## ✔ tidyr 0.7.2 ✔ dplyr 0.7.4
## ✔ readr 1.1.1 ✔ stringr 1.3.1
## ✔ tibble 1.4.2 ✔ forcats 0.2.0
## Warning: package 'stringr' was built under R version 3.4.4
## ── Conflicts ─────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
library(langcog)
##
## Attaching package: 'langcog'
## The following object is masked from 'package:base':
##
## scale
library(ggthemes)
##
## Attaching package: 'ggthemes'
## The following objects are masked from 'package:langcog':
##
## scale_color_solarized, scale_colour_solarized,
## scale_fill_solarized
library(lme4)
## Loading required package: Matrix
##
## Attaching package: 'Matrix'
## The following object is masked from 'package:tidyr':
##
## expand
library(here)
## here() starts at /Users/ejyoon/Documents/Documents/Research/polcon_local/03_data_analysis/scripts
library(brms)
## Loading required package: Rcpp
## Warning: package 'Rcpp' was built under R version 3.4.4
## Loading 'brms' package (version 2.0.1). Useful instructions
## can be found by typing help('brms'). A more detailed introduction
## to the package is available through vignette('brms_overview').
## Run theme_set(theme_default()) to use the default bayesplot theme.
##
## Attaching package: 'brms'
## The following object is masked from 'package:lme4':
##
## ngrps
d1 <- read.csv(here("../data/processed_data/polcon_v3.csv")) %>%
gather(var, question_type, qtype1:qtype2) %>%
mutate(trial = trial_num) %>%
mutate(var = substr(var, 6, 6)) %>%
select(subid, order, var, trial, question_type)
## Warning: attributes are not identical across measure variables;
## they will be dropped
d2 <- read.csv(here("../data/processed_data/polcon_v3.csv")) %>%
gather(var, correct, correct1:correct2) %>%
mutate(trial = trial_num) %>%
mutate(var = substr(var, 8, 8)) %>%
select(subid, order, trial, var, correct)
d1 <- left_join(d1, d2) %>%
mutate(trial = as.factor(as.character(trial))) %>%
mutate(question_type = as.factor(ifelse(order == "2" & var == "2" & trial == "5", "comp", question_type)))
## Joining, by = c("subid", "order", "var", "trial")
# order sheet with trial info
order <- read.csv(here("../data/info/v1/v1_order.csv")) %>%
mutate(trial = substr(trial, 6,9)) %>%
separate(trial, c("trial", "var"))
d1_1 <- left_join(filter(d1, order == "1"),
select(order, trial, var, request_type_ord1)) %>%
mutate(request_type = request_type_ord1) %>%
select(subid, order, trial, request_type,question_type, correct)
## Joining, by = c("var", "trial")
## Warning: Column `trial` joining factor and character vector, coercing into
## character vector
d1_2 <- left_join(filter(d1, order == "2"),
select(order, trial, var, request_type_ord2)) %>%
mutate(request_type = request_type_ord2) %>%
select(subid, order, trial, request_type,question_type, correct)
## Joining, by = c("var", "trial")
## Warning: Column `trial` joining factor and character vector, coercing into
## character vector
d1 <- rbind(d1_1, d1_2)
# mutate(valence = ifelse(question_type %in% c("polite","nice"),
# "positive", "negative"),
# adjective = ifelse(question_type %in% c("nice","mean"), "easy","hard"))
# log sheet with subj info
log <- read.csv(here("../data/info/v3/subject_log_v3.csv"))
d1 <- left_join(d1, log)
## Joining, by = "subid"
## Warning: Column `subid` joining factors with different levels, coercing to
## character vector
# filter by exclusion criteria
d1 <- d1 %>%
filter(keepdrop != "drop", age >= 3 | age <= 4)
# make factors
d1 <- d1 %>%
mutate(subid = as.factor(subid),
order = as.factor(order),
question_type = as.factor(question_type),
gender = as.factor(gender))
# how many before filtering by practice?
subj <- d1 %>%
distinct(subid, .keep_all=TRUE) %>%
mutate(age_fct = floor(age)) %>%
group_by(age_fct) %>%
summarise(count = n())
subj
## # A tibble: 2 x 2
## age_fct count
## <dbl> <int>
## 1 3 22
## 2 4 27
# drop those who failed practice trials
subj_failed_practice <- filter(d1, (trial == "0" & correct == "0"))
d1 <- d1 %>%
filter(!subid %in% subj_failed_practice$subid) %>%
filter(trial != "0")
# change question_type
# v3 had just polite vs. rude, but this was not reflected in the logging system
d1 <- d1 %>%
mutate(question_type = fct_recode(question_type,
"polite" = "nice",
"rude" = "mean")) %>%
# make age grouping
mutate(age_fct = floor(age)) %>%
filter(question_type != "comp") %>%
select(-keepdrop, -initials, -dot, -dob)
write.csv(d1,paste("../data/processed_data/polcon_v3_ready_for_ana.csv",sep=""),
row.names=FALSE)
subj <- d1 %>%
distinct(subid, .keep_all=TRUE) %>%
group_by(age_fct) %>%
summarise(count = n())
subj
## # A tibble: 2 x 2
## age_fct count
## <dbl> <int>
## 1 3 21
## 2 4 25
d1 %>%
distinct(subid, .keep_all=TRUE) %>%
group_by(age) %>%
ggplot(., aes(x=age)) +
geom_histogram()
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
d1 <- d1 %>%
mutate(question_type = fct_relevel(question_type, "polite"))
ms <- d1 %>%
mutate(correct = as.numeric(as.character(correct))) %>%
group_by(age_fct, question_type, subid) %>%
summarize(
correct = mean(correct, na.rm=TRUE)
) %>%
group_by(age_fct, question_type) %>%
multi_boot_standard(col = "correct", na.rm=TRUE) %>%
ungroup() %>%
mutate(correct = mean,
age_fct = factor(age_fct, labels=c("3-yr-olds", "4-yr-olds")))
p <- ggplot(subset(ms, correct!="NA" & (question_type != "play with" & question_type != "compliance")),
aes(x=age_fct, y=correct, fill=age_fct))
p +
geom_bar(position=position_dodge(), stat = "identity") +
# geom_errorbar(position=position_dodge(.9), aes(ymin=ci_lower,ymax=ci_upper,width=.1)) +
facet_grid(.~question_type) +
xlab("Age") +
ylab("Proportion correct") +
ggtitle("Correct answers on questions \"Who was being more _____ ?\"") +
scale_fill_solarized(guide = "none") +
geom_errorbar(position=position_dodge(.9), aes(ymin=ci_lower,ymax=ci_upper,width=.1)) +
geom_hline(yintercept=.50,lty=4) +
theme_few()
ms <- d1 %>%
filter(question_type != "comp"& question_type != "play") %>%
mutate(correct = as.numeric(as.character(correct))) %>%
group_by(age_fct, request_type, subid) %>%
summarize(
correct = mean(correct, na.rm=TRUE)
) %>%
group_by(age_fct, request_type) %>%
multi_boot_standard(col = "correct", na.rm=TRUE) %>%
ungroup() %>%
mutate(correct = mean,
age_fct = factor(age_fct, labels=c("3-yr-olds", "4-yr-olds")),
request_type = recode(request_type,
idq = "can you~",
please_idq = "can you please~"))
p <- ggplot(subset(ms, correct!="NA"),
aes(x=age_fct, y=correct, fill=age_fct))
p +
geom_bar(position=position_dodge(), stat = "identity") +
# geom_errorbar(position=position_dodge(.9), aes(ymin=ci_lower,ymax=ci_upper,width=.1)) +
facet_grid(.~request_type) +
xlab("Age") +
ylab("Proportion correct") +
ggtitle("Correct answers on questions \"Who was being more _____ ?\"") +
scale_fill_solarized(guide = "none") +
geom_errorbar(position=position_dodge(.9), aes(ymin=ci_lower,ymax=ci_upper,width=.1)) +
geom_hline(yintercept=.50,lty=4) +
theme_few()
ms <- d1 %>%
filter(question_type != "play" & question_type != "comp") %>%
mutate(correct = as.numeric(as.character(correct))) %>%
group_by(age_fct, question_type, request_type, subid) %>%
summarize(
correct = mean(correct, na.rm=TRUE)
) %>%
group_by(age_fct, question_type, request_type) %>%
multi_boot_standard(col = "correct", na.rm=TRUE) %>%
ungroup() %>%
mutate(correct = mean,
# question_type = factor(question_type, levels=c("polite", "nice", "rude", "mean", "play", "comp"))) %>%
# mutate(question_type = factor(question_type, labels=c("polite", "nice", "rude", "mean", "play with", "compliance")),
age_fct = factor(age_fct, labels=c("3-yr-olds", "4-yr-olds")),
request_type = recode(request_type,
idq = "can you~",
please_idq = "can you please~"))
p <- ggplot(filter(ms, !is.na(correct)),
aes(x=age_fct, y=correct, fill=age_fct))
p +
geom_bar(position=position_dodge(), stat = "identity") +
# geom_jitter(data=d1, aes(x=request_type, y=correct, fill=request_type)) +
facet_grid(question_type~request_type) +
xlab("Age") +
ylab("Proportion correct") +
ggtitle("Correct answers on questions \"Who was being more _____ ?\"") +
scale_fill_solarized(guide = "none") +
geom_errorbar(position=position_dodge(.9), aes(ymin=ci_lower,ymax=ci_upper,width=.1)) +
geom_hline(yintercept=.50,lty=4) +
theme_few()
each point represents a participant.
p <- ggplot(d1 %>% filter(correct!="NA") %>%
mutate(request_type = recode(request_type,
idq = "can you~",
please_idq = "can you please~")),
aes(x=age, y=correct, col=age))
p +
# geom_violin(position = position_dodge(width = 1), scale = "width", size = 1,
# draw_quantiles = c(0.5), alpha = 0.3) +
geom_jitter() +
geom_smooth() +
# geom_errorbar(position=position_dodge(.9), aes(ymin=ci_lower,ymax=ci_upper,width=.1)) +
facet_grid(question_type~request_type) +
xlab("Age") +
ylab("Proportion correct") +
ggtitle("Correct answers on questions \"Who was being more _____ ?\"") +
scale_fill_solarized(guide = "none") +
# geom_errorbar(position=position_dodge(.9), aes(ymin=ci_lower,ymax=ci_upper,width=.1)) +
geom_hline(yintercept=.50,lty=4) +
theme_few()
## `geom_smooth()` using method = 'loess'
# stats
# brm_p <- brm(data=d1 %>% mutate(question_type = fct_relevel(question_type, "polite")) %>%
# mutate(age = scale(age)), family = "bernoulli", correct ~ age * request_type * question_type + (request_type + question_type | subid))
# save(brm_p, file=here("../scripts/polcon_brm_agescale.Rds"))
load(here("../scripts/polcon_brm_agescale.Rds"))
summary(brm_p)
## Family: bernoulli
## Links: mu = logit
## Formula: correct ~ age * request_type * question_type + (request_type + question_type | subid)
## Data: d1 %>% mutate(question_type = fct_relevel(question (Number of observations: 548)
## Samples: 4 chains, each with iter = 2000; warmup = 1000; thin = 1;
## total post-warmup samples = 4000
## ICs: LOO = NA; WAIC = NA; R2 = NA
##
## Group-Level Effects:
## ~subid (Number of levels: 46)
## Estimate Est.Error l-95% CI
## sd(Intercept) 0.68 0.32 0.07
## sd(request_typeplease) 0.68 0.45 0.03
## sd(request_typeplease_idq) 1.31 0.59 0.16
## sd(question_typerude) 1.21 0.42 0.38
## cor(Intercept,request_typeplease) 0.12 0.41 -0.69
## cor(Intercept,request_typeplease_idq) 0.34 0.36 -0.45
## cor(request_typeplease,request_typeplease_idq) 0.15 0.40 -0.66
## cor(Intercept,question_typerude) -0.31 0.35 -0.84
## cor(request_typeplease,question_typerude) -0.19 0.41 -0.85
## cor(request_typeplease_idq,question_typerude) -0.08 0.36 -0.74
## u-95% CI Eff.Sample Rhat
## sd(Intercept) 1.34 648 1.01
## sd(request_typeplease) 1.66 847 1.00
## sd(request_typeplease_idq) 2.56 798 1.01
## sd(question_typerude) 2.03 862 1.00
## cor(Intercept,request_typeplease) 0.83 2037 1.00
## cor(Intercept,request_typeplease_idq) 0.89 767 1.00
## cor(request_typeplease,request_typeplease_idq) 0.82 1093 1.00
## cor(Intercept,question_typerude) 0.51 895 1.00
## cor(request_typeplease,question_typerude) 0.67 604 1.00
## cor(request_typeplease_idq,question_typerude) 0.64 1007 1.01
##
## Population-Level Effects:
## Estimate Est.Error l-95% CI
## Intercept 0.40 0.25 -0.09
## age 0.24 0.25 -0.26
## request_typeplease 1.18 0.41 0.41
## request_typeplease_idq 1.71 0.56 0.74
## question_typerude 0.73 0.40 -0.03
## age:request_typeplease -0.00 0.37 -0.75
## age:request_typeplease_idq 1.37 0.57 0.35
## age:question_typerude -0.23 0.40 -1.02
## request_typeplease:question_typerude -1.78 0.53 -2.84
## request_typeplease_idq:question_typerude -1.31 0.61 -2.51
## age:request_typeplease:question_typerude 1.06 0.53 0.01
## age:request_typeplease_idq:question_typerude -0.45 0.64 -1.73
## u-95% CI Eff.Sample Rhat
## Intercept 0.92 3249 1.00
## age 0.74 2394 1.00
## request_typeplease 2.03 2465 1.00
## request_typeplease_idq 2.94 1684 1.00
## question_typerude 1.50 2464 1.00
## age:request_typeplease 0.73 2562 1.00
## age:request_typeplease_idq 2.62 1805 1.00
## age:question_typerude 0.53 2275 1.00
## request_typeplease:question_typerude -0.79 2376 1.00
## request_typeplease_idq:question_typerude -0.15 2459 1.00
## age:request_typeplease:question_typerude 2.11 2578 1.00
## age:request_typeplease_idq:question_typerude 0.74 2148 1.00
##
## Samples were drawn using sampling(NUTS). For each parameter, Eff.Sample
## is a crude measure of effective sample size, and Rhat is the potential
## scale reduction factor on split chains (at convergence, Rhat = 1).
glm_p <- glmer(data=d1 %>% mutate(question_type = fct_relevel(question_type, "polite")) %>%
mutate(age = scale(age)), family = "binomial", correct ~ age * request_type * question_type + (1 | subid))
summary(glm_p)
## Generalized linear mixed model fit by maximum likelihood (Laplace
## Approximation) [glmerMod]
## Family: binomial ( logit )
## Formula: correct ~ age * request_type * question_type + (1 | subid)
## Data:
## d1 %>% mutate(question_type = fct_relevel(question_type, "polite")) %>%
## mutate(age = scale(age))
##
## AIC BIC logLik deviance df.resid
## 637.5 693.5 -305.8 611.5 535
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -2.8676 -0.7956 0.4319 0.6222 3.0943
##
## Random effects:
## Groups Name Variance Std.Dev.
## subid (Intercept) 0.435 0.6596
## Number of obs: 548, groups: subid, 46
##
## Fixed effects:
## Estimate Std. Error z value
## (Intercept) 0.38813 0.24360 1.593
## age 0.23151 0.24378 0.950
## request_typeplease 1.01057 0.34638 2.917
## request_typeplease_idq 1.20513 0.38659 3.117
## question_typerude 0.60818 0.32864 1.851
## age:request_typeplease -0.02038 0.33800 -0.060
## age:request_typeplease_idq 0.93664 0.39298 2.383
## age:question_typerude -0.24581 0.32421 -0.758
## request_typeplease:question_typerude -1.56831 0.48799 -3.214
## request_typeplease_idq:question_typerude -1.12285 0.52043 -2.158
## age:request_typeplease:question_typerude 0.93555 0.49022 1.908
## age:request_typeplease_idq:question_typerude -0.29345 0.52211 -0.562
## Pr(>|z|)
## (Intercept) 0.11109
## age 0.34228
## request_typeplease 0.00353 **
## request_typeplease_idq 0.00182 **
## question_typerude 0.06422 .
## age:request_typeplease 0.95192
## age:request_typeplease_idq 0.01715 *
## age:question_typerude 0.44835
## request_typeplease:question_typerude 0.00131 **
## request_typeplease_idq:question_typerude 0.03096 *
## age:request_typeplease:question_typerude 0.05634 .
## age:request_typeplease_idq:question_typerude 0.57408
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Correlation of Fixed Effects:
## (Intr) age rqst_t rqst__ qstn_t ag:rq_ ag:r__ ag:qs_ rqs_:_
## age 0.010
## rqst_typpls -0.583 -0.003
## rqst_typpl_ -0.523 -0.004 0.376
## qustn_typrd -0.617 -0.005 0.442 0.393
## ag:rqst_typ -0.006 -0.605 0.057 0.006 0.004
## ag:rqst_ty_ -0.004 -0.518 0.007 0.291 0.005 0.377
## ag:qstn_typ -0.008 -0.632 0.002 0.004 0.005 0.457 0.390
## rqst_typp:_ 0.411 0.001 -0.714 -0.267 -0.678 -0.038 -0.003 0.000
## rqst_typ_:_ 0.389 0.003 -0.279 -0.741 -0.631 -0.003 -0.215 -0.003 0.427
## ag:rqst_t:_ 0.008 0.420 -0.032 -0.002 0.001 -0.691 -0.258 -0.664 0.041
## ag:rqst__:_ 0.005 0.392 -0.001 -0.217 -0.003 -0.284 -0.749 -0.621 -0.001
## rq__:_ ag:_:_
## age
## rqst_typpls
## rqst_typpl_
## qustn_typrd
## ag:rqst_typ
## ag:rqst_ty_
## ag:qstn_typ
## rqst_typp:_
## rqst_typ_:_
## ag:rqst_t:_ 0.000
## ag:rqst__:_ 0.202 0.412