loading and cleaning data
d_filter_by_practice <- d_raw %>%
filter(practice1 == "Y", practice2 == "Y", practice3 == "Y",
practice4 == "Y", practice5 == "Y", practice6 == "Y")
d_clean <- d_raw %>%
filter(subid %in% d_filter_by_practice$subid) %>%
select(-practice1, -practice2, -practice3,
-practice4, -practice5, -practice6) %>%
gather(-order, -subid, key = trial, value = answer) %>%
separate(trial, c("trial", "question"), extra="merge") %>%
mutate(trial = substr(trial, 6, 6)) %>%
mutate(question = case_when(
grepl("playWhy", question) ~ "Pwhy",
grepl("play", question) ~ "play",
TRUE ~ question
)
)
order <- read_csv(here("../data/version3_replication/trupol_v3_order.csv")) %>%
mutate_if(is.numeric, as.character)
## Parsed with column specification:
## cols(
## order = col_integer(),
## trial = col_integer(),
## question = col_character(),
## qkind = col_character(),
## speaker = col_character(),
## item = col_character(),
## cond = col_character()
## )
d_join1 <- left_join(d_clean %>% mutate_if(is.numeric, as.character), order)
## Joining, by = c("order", "trial", "question")
log <- read_csv(here("../data/version3_replication/trupol_v3_subject_log.csv")) %>%
rename(subid = "Subject ID") %>%
filter(Consent == "Y", English > 3, !grepl("pilot", Comments)) %>%
select(subid, Age, Gender)
## Parsed with column specification:
## cols(
## `Subject ID` = col_character(),
## `Child Initials` = col_character(),
## Consent = col_character(),
## `Consent-Vid` = col_character(),
## `Testing Date` = col_character(),
## `Birth Date` = col_character(),
## Age = col_double(),
## Gender = col_character(),
## hispanic_latino = col_character(),
## `Ethnicity/Race` = col_character(),
## English = col_integer(),
## lang1 = col_character(),
## lang2 = col_character(),
## lang3 = col_character(),
## parent1_ed = col_integer(),
## parent2_ed = col_integer(),
## Preemie = col_character(),
## `Experimentor ` = col_character(),
## Comments = col_character()
## )
d <- left_join(log, d_join1) %>%
filter(!is.na(order))
## Joining, by = "subid"
d %>%
distinct(subid, Age) %>%
group_by(floor(Age)) %>%
summarise(n=n())
## # A tibble: 4 x 2
## `floor(Age)` n
## <dbl> <int>
## 1 5 12
## 2 6 14
## 3 7 12
## 4 8 10
d %>%
# filter(trial %in% c(1, 2, 3, 4)) %>%
filter(qkind %in% c("nice", "mean", "truth")) %>%
mutate(qkind = fct_relevel(qkind, "truth", "nice", "mean")) %>%
mutate(answer = case_when(answer == "Y" ~ 1,
answer == "IDK" ~ 0.5,
answer == "N" ~ 0)) %>%
ggplot(., aes(x=Age, y=answer, col = answer)) +
geom_jitter(height = .05) +
facet_grid(qkind~speaker*cond) +
theme_few() +
geom_hline(aes(yintercept=0.5), linetype="dashed") +
geom_smooth(method = "lm", se = FALSE, col="red", size=.5) +
scale_color_gradient(low="orange", high="purple") +
ggtitle("Judgments for honest vs. polite speaker (replication)") +
ylab("Proportion \"yes [the speaker was _____]\"") +
xlab("Age (in years)") +
ylim(-0.1,1.1) +
scale_y_continuous(breaks=seq(0,1,.5))
## Scale for 'y' is already present. Adding another scale for 'y', which
## will replace the existing scale.
ggsave(here("age_by_cond.pdf"), width=8)
## Saving 8 x 5 in image
d %>%
filter(qkind %in% c("nice", "mean", "truth")) %>%
mutate(qkind = fct_relevel(qkind, "truth", "nice", "mean")) %>%
mutate(cond = fct_relevel(cond, "expt"),
cond = fct_recode(cond,
"dishonest for politeness" = "expt",
"dishonest for no apparent reason" = "cont")) %>%
mutate(
# speaker = fct_relevel(speaker, "polite"),
speaker = fct_recode(speaker, "dishonest" = "polite")) %>%
mutate(answer = case_when(answer == "Y" ~ 1,
answer == "IDK" ~ 0.5,
answer == "N" ~ 0)) %>%
mutate(agebin = case_when(
Age < 7 ~ "6",
Age >= 7 ~ "8"
)) %>%
group_by(agebin, cond, speaker, qkind, subid) %>%
summarise(answer = mean(answer, na.rm=TRUE)) %>%
group_by(agebin, cond, speaker, qkind) %>%
multi_boot_standard(col="answer") %>%
rename(answer = mean) %>%
ggplot(., aes(x=agebin, y=answer, fill=speaker)) +
geom_bar(position=position_dodge(), stat = "identity") +
facet_grid(qkind~cond) +
geom_errorbar(position=position_dodge(.9), aes(ymin=ci_lower,ymax=ci_upper,width=.1)) +
ggtitle("Judgments for honest vs. polite speaker (replication)") +
geom_hline(yintercept=.50,lty=4) +
xlab("Age (years)") +
ylab("Proportion \"yes [the speaker was _____]\"") +
# scale_fill_solarized(guide = guide_legend(title = "speaker")) +
scale_fill_ptol() +
theme_few() +
ylim(0, 1)
What did the original sample look like?
Experimental condition only. (this was not asked in the control)
d_Lfeel <- d %>%
filter(question == "LFeel") %>%
mutate(answer = as.numeric(as.character(case_when(
grepl(paste( "sad", "bad", sep = "|" ), answer, ignore.case=TRUE) ~ "0",
grepl(paste( "good", "happy", sep = "|" ), answer, ignore.case=TRUE) ~ "1",
TRUE ~ "NA"
)))) %>%
filter(!is.na(answer))
## Warning in evalq(as.numeric(as.character(case_when(grepl(paste("sad",
## "bad", : NAs introduced by coercion
d_Lfeel %>%
ggplot(., aes(x=Age, y=answer, color=answer)) +
geom_jitter() +
geom_smooth() +
facet_grid(.~speaker) +
geom_hline(yintercept=0.5, lty=2)
## `geom_smooth()` using method = 'loess'
where 1 = polite (dishonest) speaker, 0 = honest speaker
d %>%
# filter(trial %in% c(1, 2, 3, 4)) %>%
filter(question == "play") %>%
mutate(answer = as.numeric(as.character(case_when(
grepl(paste( "Sally", "Richard", "Bobby", "Nancy", sep = "|" ), answer, ignore.case=TRUE) ~ "0", # honest
grepl(paste( "Mary", "Stanley", "Colin", "Gabby", sep = "|" ), answer, ignore.case=TRUE) ~ "1", # polite
TRUE ~ "NA"
)))) %>%
filter(!is.na(answer)) %>%
ggplot(., aes(x=Age, y=answer, color=answer)) +
geom_jitter() +
geom_smooth() +
facet_grid(.~cond) +
geom_hline(yintercept=0.5, lty=2)
## Warning in evalq(as.numeric(as.character(case_when(grepl(paste("Sally", :
## NAs introduced by coercion
## `geom_smooth()` using method = 'loess'
d_SWhy <- d %>%
filter(question == "SWhy" | qkind == "nice" | qkind == "mean") %>%
mutate(qkind = case_when(
question == "SWhy" ~ "SWhy",
TRUE ~ qkind
)) %>%
select(-question) %>%
spread(qkind, answer) %>%
mutate(SWhy_kind = case_when(
grepl(paste("mean", "lie", "nice", "sad", "hurt", "upset", "bad", "mad", "happy", "feel", "friend", sep = "|" ), SWhy, ignore.case=TRUE) ~ "care for other",
grepl(paste("not lie", "honest", "truth",
"right", sep = "|" ), SWhy, ignore.case=TRUE) ~ "veracity",
grepl(paste("thought", "beautiful", "boring", "yucky", "ugly", "yummy", "like", "pretty", "fun", "weird", "cookie", sep = "|" ), SWhy, ignore.case=TRUE) ~ "state",
grepl(paste("don't know", sep = "|" ), SWhy, ignore.case=TRUE) ~ "IDK",
TRUE ~ "NA"
)) %>%
mutate(agebin = case_when(
Age < 7 ~ "6",
Age >= 7 ~ "8"
)) %>%
filter(SWhy_kind != "NA") %>%
mutate(nice = case_when(nice == "Y" ~ 1,
nice == "IDK" ~ 0.5,
nice == "N" ~ 0)) %>%
mutate(mean = case_when(mean == "Y" ~ 1,
mean == "IDK" ~ 0.5,
mean == "N" ~ 0))
d_SWhy %>%
group_by(speaker, SWhy_kind) %>%
multi_boot_standard(col="nice") %>%
rename(nice = mean) %>%
ggplot(., aes(x=SWhy_kind, y=nice, fill=speaker)) +
geom_bar(stat="identity", position="dodge") +
geom_linerange(aes(ymin=ci_lower, ymax=ci_upper), position=position_dodge(width=.9)) +
scale_fill_solarized() +
geom_hline(yintercept=.5, lty=2)
d_SWhy %>%
group_by(speaker, SWhy_kind) %>%
multi_boot_standard(col="mean") %>%
ggplot(., aes(x=SWhy_kind, y=mean, fill=speaker)) +
geom_bar(stat="identity", position="dodge") +
geom_linerange(aes(ymin=ci_lower, ymax=ci_upper), position=position_dodge(width=.9)) +
scale_fill_solarized() +
geom_hline(yintercept=.5, lty=2)
## Joining, by = "age"
# brm_trupol_nice <- brm(data=d %>%
# filter(qkind == "nice", (answer == "Y" | answer == "N")) %>%
# mutate(answer = case_when(answer == "Y" ~ 1,
# answer == "N" ~ 0)),
# answer ~ Age * cond * speaker + (speaker + cond | subid) + (speaker + cond | item),
# family="bernoulli", iter=3000, control = list(adapt_delta = 0.99, max_treedepth = 15))
# save(brm_trupol_nice, file=here("brm_trupol_nice.Rds"))
load(here("brm_trupol_nice.Rds"))
summary(brm_trupol_nice)
## Family: bernoulli
## Links: mu = logit
## Formula: answer ~ Age * cond * speaker + (speaker + cond | subid) + (speaker + cond | item)
## Data: d %>% filter(qkind == "nice", (answer == "Y" | ans (Number of observations: 374)
## Samples: 4 chains, each with iter = 3000; warmup = 1500; thin = 1;
## total post-warmup samples = 6000
## ICs: LOO = NA; WAIC = NA; R2 = NA
##
## Group-Level Effects:
## ~item (Number of levels: 4)
## Estimate Est.Error l-95% CI u-95% CI
## sd(Intercept) 1.56 1.64 0.06 5.96
## sd(speakerpolite) 1.83 2.11 0.06 7.27
## sd(condexpt) 2.40 2.27 0.16 8.37
## cor(Intercept,speakerpolite) -0.06 0.51 -0.90 0.86
## cor(Intercept,condexpt) -0.16 0.49 -0.92 0.79
## cor(speakerpolite,condexpt) 0.00 0.51 -0.89 0.89
## Eff.Sample Rhat
## sd(Intercept) 2768 1.00
## sd(speakerpolite) 3056 1.00
## sd(condexpt) 2959 1.00
## cor(Intercept,speakerpolite) 4792 1.00
## cor(Intercept,condexpt) 4363 1.00
## cor(speakerpolite,condexpt) 3730 1.00
##
## ~subid (Number of levels: 47)
## Estimate Est.Error l-95% CI u-95% CI
## sd(Intercept) 6.31 2.21 3.11 11.60
## sd(speakerpolite) 15.83 5.52 8.40 29.87
## sd(condexpt) 3.15 1.72 0.42 7.19
## cor(Intercept,speakerpolite) -0.38 0.19 -0.71 0.04
## cor(Intercept,condexpt) -0.49 0.35 -0.94 0.43
## cor(speakerpolite,condexpt) -0.23 0.34 -0.85 0.47
## Eff.Sample Rhat
## sd(Intercept) 827 1.01
## sd(speakerpolite) 962 1.01
## sd(condexpt) 681 1.01
## cor(Intercept,speakerpolite) 1171 1.00
## cor(Intercept,condexpt) 1706 1.00
## cor(speakerpolite,condexpt) 2333 1.00
##
## Population-Level Effects:
## Estimate Est.Error l-95% CI u-95% CI Eff.Sample
## Intercept 7.99 8.06 -6.66 25.96 1103
## Age -0.38 1.10 -2.77 1.82 1081
## condexpt -5.36 6.58 -20.06 6.78 1161
## speakerpolite 11.95 18.98 -21.13 54.30 871
## Age:condexpt 0.21 0.88 -1.55 2.04 1152
## Age:speakerpolite -3.02 2.80 -9.45 1.70 810
## condexpt:speakerpolite -8.97 9.03 -28.87 7.22 1439
## Age:condexpt:speakerpolite 2.32 1.46 -0.07 5.69 1180
## Rhat
## Intercept 1.00
## Age 1.00
## condexpt 1.00
## speakerpolite 1.00
## Age:condexpt 1.00
## Age:speakerpolite 1.00
## condexpt:speakerpolite 1.00
## Age:condexpt:speakerpolite 1.00
##
## Samples were drawn using sampling(NUTS). For each parameter, Eff.Sample
## is a crude measure of effective sample size, and Rhat is the potential
## scale reduction factor on split chains (at convergence, Rhat = 1).
# brm_trupol_mean <- brm(data=d %>%
# filter(qkind == "mean", (answer == "Y" | answer == "N")) %>%
# mutate(answer = case_when(answer == "Y" ~ 1,
# answer == "N" ~ 0)),
# answer ~ Age * cond * speaker + (speaker + cond | subid) + (speaker + cond | item),
# family="bernoulli", iter=3000, control = list(adapt_delta = 0.99, max_treedepth = 15))
# save(brm_trupol_mean, file=here("brm_trupol_mean.Rds"))
load(here("brm_trupol_mean.Rds"))
summary(brm_trupol_mean)
## Family: bernoulli
## Links: mu = logit
## Formula: answer ~ Age * cond * speaker + (speaker + cond | subid) + (speaker + cond | item)
## Data: d %>% filter(qkind == "mean", (answer == "Y" | ans (Number of observations: 374)
## Samples: 4 chains, each with iter = 3000; warmup = 1500; thin = 1;
## total post-warmup samples = 6000
## ICs: LOO = NA; WAIC = NA; R2 = NA
##
## Group-Level Effects:
## ~item (Number of levels: 4)
## Estimate Est.Error l-95% CI u-95% CI
## sd(Intercept) 1.46 1.62 0.05 5.63
## sd(speakerpolite) 4.25 2.88 0.97 11.62
## sd(condexpt) 2.64 2.36 0.14 8.73
## cor(Intercept,speakerpolite) -0.02 0.51 -0.89 0.89
## cor(Intercept,condexpt) -0.19 0.50 -0.93 0.81
## cor(speakerpolite,condexpt) -0.34 0.46 -0.96 0.68
## Eff.Sample Rhat
## sd(Intercept) 2018 1.00
## sd(speakerpolite) 2456 1.00
## sd(condexpt) 2311 1.00
## cor(Intercept,speakerpolite) 2330 1.00
## cor(Intercept,condexpt) 2962 1.00
## cor(speakerpolite,condexpt) 4259 1.00
##
## ~subid (Number of levels: 47)
## Estimate Est.Error l-95% CI u-95% CI
## sd(Intercept) 6.88 2.31 3.45 12.45
## sd(speakerpolite) 14.64 4.23 8.44 24.81
## sd(condexpt) 2.59 1.59 0.17 6.19
## cor(Intercept,speakerpolite) -0.59 0.18 -0.88 -0.18
## cor(Intercept,condexpt) 0.02 0.42 -0.76 0.81
## cor(speakerpolite,condexpt) -0.43 0.39 -0.95 0.53
## Eff.Sample Rhat
## sd(Intercept) 1419 1.00
## sd(speakerpolite) 1059 1.00
## sd(condexpt) 860 1.00
## cor(Intercept,speakerpolite) 901 1.00
## cor(Intercept,condexpt) 2898 1.00
## cor(speakerpolite,condexpt) 1872 1.00
##
## Population-Level Effects:
## Estimate Est.Error l-95% CI u-95% CI Eff.Sample
## Intercept -12.07 9.56 -33.75 4.66 1428
## Age 0.72 1.27 -1.70 3.44 1394
## condexpt 1.41 7.21 -13.70 16.13 2028
## speakerpolite 0.71 17.21 -34.68 34.58 1240
## Age:condexpt 0.33 0.97 -1.58 2.36 2005
## Age:speakerpolite 0.87 2.38 -3.64 6.02 1223
## condexpt:speakerpolite 7.72 8.73 -8.55 25.21 2009
## Age:condexpt:speakerpolite -1.76 1.27 -4.44 0.53 1853
## Rhat
## Intercept 1.00
## Age 1.00
## condexpt 1.00
## speakerpolite 1.00
## Age:condexpt 1.00
## Age:speakerpolite 1.00
## condexpt:speakerpolite 1.00
## Age:condexpt:speakerpolite 1.00
##
## Samples were drawn using sampling(NUTS). For each parameter, Eff.Sample
## is a crude measure of effective sample size, and Rhat is the potential
## scale reduction factor on split chains (at convergence, Rhat = 1).
# brm_trupol_truth <- brm(data=d %>%
# filter(qkind == "truth", (answer == "Y" | answer == "N")) %>%
# mutate(answer = case_when(answer == "Y" ~ 1,
# answer == "N" ~ 0)),
# answer ~ Age * cond * speaker + (speaker + cond | subid) + (speaker + cond | item),
# family="bernoulli", iter=3000, control = list(adapt_delta = 0.99, max_treedepth = 15))
# save(brm_trupol_truth, file=here("brm_trupol_truth.Rds"))
load(here("brm_trupol_truth.Rds"))
summary(brm_trupol_truth)
## Family: bernoulli
## Links: mu = logit
## Formula: answer ~ Age * cond * speaker + (speaker + cond | subid) + (speaker + cond | item)
## Data: d %>% filter(qkind == "truth", (answer == "Y" | an (Number of observations: 384)
## Samples: 4 chains, each with iter = 3000; warmup = 1500; thin = 1;
## total post-warmup samples = 6000
## ICs: LOO = NA; WAIC = NA; R2 = NA
##
## Group-Level Effects:
## ~item (Number of levels: 4)
## Estimate Est.Error l-95% CI u-95% CI
## sd(Intercept) 1.44 1.56 0.05 5.53
## sd(speakerpolite) 1.20 1.44 0.04 5.23
## sd(condexpt) 2.11 2.19 0.07 7.96
## cor(Intercept,speakerpolite) -0.07 0.50 -0.90 0.85
## cor(Intercept,condexpt) -0.26 0.50 -0.95 0.79
## cor(speakerpolite,condexpt) -0.01 0.51 -0.89 0.88
## Eff.Sample Rhat
## sd(Intercept) 2914 1.00
## sd(speakerpolite) 4576 1.00
## sd(condexpt) 2932 1.00
## cor(Intercept,speakerpolite) 6000 1.00
## cor(Intercept,condexpt) 6000 1.00
## cor(speakerpolite,condexpt) 6000 1.00
##
## ~subid (Number of levels: 48)
## Estimate Est.Error l-95% CI u-95% CI
## sd(Intercept) 3.95 1.50 1.82 7.57
## sd(speakerpolite) 8.74 3.54 3.80 17.60
## sd(condexpt) 1.35 1.05 0.06 4.00
## cor(Intercept,speakerpolite) -0.24 0.31 -0.76 0.46
## cor(Intercept,condexpt) -0.35 0.48 -0.96 0.76
## cor(speakerpolite,condexpt) -0.14 0.46 -0.90 0.78
## Eff.Sample Rhat
## sd(Intercept) 1639 1.00
## sd(speakerpolite) 1874 1.00
## sd(condexpt) 1519 1.00
## cor(Intercept,speakerpolite) 1461 1.00
## cor(Intercept,condexpt) 6000 1.00
## cor(speakerpolite,condexpt) 4592 1.00
##
## Population-Level Effects:
## Estimate Est.Error l-95% CI u-95% CI Eff.Sample
## Intercept -2.21 6.18 -15.73 9.21 1861
## Age 1.20 0.99 -0.40 3.47 1599
## condexpt -2.51 5.19 -12.85 7.79 2714
## speakerpolite 29.66 17.73 1.41 71.01 1319
## Age:condexpt 0.27 0.80 -1.34 1.77 2471
## Age:speakerpolite -7.05 3.13 -14.46 -2.46 1450
## condexpt:speakerpolite -5.17 9.54 -25.40 12.92 2645
## Age:condexpt:speakerpolite 1.05 1.61 -1.91 4.57 2297
## Rhat
## Intercept 1.00
## Age 1.00
## condexpt 1.00
## speakerpolite 1.00
## Age:condexpt 1.00
## Age:speakerpolite 1.00
## condexpt:speakerpolite 1.00
## Age:condexpt:speakerpolite 1.00
##
## Samples were drawn using sampling(NUTS). For each parameter, Eff.Sample
## is a crude measure of effective sample size, and Rhat is the potential
## scale reduction factor on split chains (at convergence, Rhat = 1).