library(brms)
library(DHARMa)
library(tidyverse)
library(readxl)
library(rmarkdown)
library(tidybayes)
library(GGally)
library(plotrix)
library(ggpubr)
library(ggthemes)
library(sjPlot)
library(tidybayes)
library(patchwork)
library(readODS)
Open prepped files – these come DOD_42mo.RMD, DOD_48mo.RMD and DOD_54mo.RMD. In those files, I removed missing data and created a variable to indicate whether participants produced at least 1 DOD, and also calculated the number of missing trials per participant. I then saved those datasets so I don’t have to re-run those processing steps.
DOD_42mo_prepped <- read_ods("DOD_42mo_prepped.ods") %>%
mutate(COVID.Zoom.session = NA)
DOD_48mo_prepped <- read_ods("DOD_48mo_prepped.ods") %>%
mutate(COVID.Zoom.session = NA)
DOD_54mo_prepped <- read_ods("DOD_54mo_prepped.ods")
Add indicator variable for time, select appropriate variables.
DOD_42mo<- DOD_42mo_prepped %>%
mutate(
Time = -1,
Time_c = "42",
PartSession = paste(Blind_ID, Time, sep="")
) %>%
dplyr::select("Blind_ID", "Time", "Time_c", "PartSession", "List", "Item", "Prime.verb", "Target.verb", "Prime.Type", "Verb.match", "RESPONSE.CODE", "Prime", "Overlap", "bias", "biasmatch", "biasmismatchc", "Trial_Problem", "COVID.Zoom.session")
DOD_48mo<- DOD_48mo_prepped %>%
mutate(
Time = 0,
Time_c = "48",
PartSession = paste(Blind_ID, Time, sep="")
) %>%
dplyr::select("Blind_ID", "Time", "Time_c", "PartSession", "List", "Item", "Prime.verb", "Target.verb", "Prime.Type", "Verb.match", "RESPONSE.CODE", "Prime", "Overlap", "bias", "biasmatch", "biasmismatchc", "Trial_Problem", "COVID.Zoom.session")
DOD_54mo<- DOD_54mo_prepped %>%
mutate(
Time = 1,
Time_c = "54",
PartSession = paste(Blind_ID, Time_c, sep="")
) %>%
dplyr::select("Blind_ID", "Time", "Time_c", "PartSession", "List", "Item", "Prime.verb", "Target.verb", "Prime.Type", "Verb.match", "RESPONSE.CODE", "Prime", "Overlap", "bias", "biasmatch", "biasmismatchc", "Trial_Problem", "COVID.Zoom.session")
Attach DOD Prod variable from first session – to model exclusively trajectory of participants who produced DODs during the first session.
Combined = bind_rows(DOD_42mo, DOD_48mo, DOD_54mo)
Combined <- DOD_42mo_prepped %>%
group_by(Blind_ID) %>%
dplyr::select("Blind_ID", "Prod_DOD") %>%
slice(1) %>%
full_join(Combined, ., by = "Blind_ID")
Combined$Time_sum <- factor(Combined$Time_c, levels=c("54", "48", "42"))
contrasts(Combined$Time_sum) = contr.sum(3)
contrasts(Combined$Time_sum)
## [,1] [,2]
## 54 1 0
## 48 0 1
## 42 -1 -1
Plot effects by time point–to see if priming effect seems to decrease and lexical boost seems to increase by age.
cbPalette <- c("#000000", "#56B4E9", "#E69F00","#009E73", "#F0E442", "#0072B2", "#D55E00", "#CC79A7")
(p0 <- Combined %>%
group_by(Prime, Overlap, Time_c) %>%
summarise(mean = mean(RESPONSE.CODE, na.rm=TRUE),
se = std.error(RESPONSE.CODE, na.rm=TRUE)) %>%
ggplot(aes(y=mean, x=Overlap, fill=Prime, ymin=mean, ymax=mean+se)) +
geom_bar(stat="identity",position="dodge") +
geom_errorbar(width=.4,
position=position_dodge(.9)) +
facet_wrap(~Time_c) +
ylim(0, .4) +
theme_minimal() +
labs(y="Raw Mean") +
scale_fill_manual(values=cbPalette)
)
## `summarise()` has grouped output by 'Prime', 'Overlap'. You can override using
## the `.groups` argument.
Truncated gaussian with SD of 2 on random effect SDs. Keeps 95% of probability < 4 (roughly). A random effect SD greater than that is a bit wild on the logit scale.
logit_prior = prior(normal(0,2), class=sd)
Model with nested random effects: ID by Time.
mlong0 <- brm(RESPONSE.CODE ~ 1 + Time_sum + (1 | Blind_ID:Time_c) + (1 + Time_sum | Blind_ID) + (1 | Target.verb), control=list(adapt_delta = .90), family="bernoulli", prior=logit_prior, iter=4000, cores=4, data=Combined,seed=12345, file="LONG/output/mlong0.rds")
mlong0 <- add_criterion(mlong0, "loo")
summary(mlong0, prob=.9)
## Family: bernoulli
## Links: mu = logit
## Formula: RESPONSE.CODE ~ 1 + Time_sum + (1 | Blind_ID:Time_c) + (1 + Time_sum | Blind_ID) + (1 | Target.verb)
## Data: Combined (Number of observations: 2865)
## Draws: 4 chains, each with iter = 4000; warmup = 2000; thin = 1;
## total post-warmup draws = 8000
##
## Group-Level Effects:
## ~Blind_ID (Number of levels: 100)
## Estimate Est.Error l-90% CI u-90% CI Rhat Bulk_ESS
## sd(Intercept) 0.92 0.25 0.50 1.30 1.01 401
## sd(Time_sum1) 0.36 0.27 0.03 0.90 1.01 451
## sd(Time_sum2) 0.68 0.37 0.09 1.30 1.01 337
## cor(Intercept,Time_sum1) 0.06 0.46 -0.73 0.80 1.00 3021
## cor(Intercept,Time_sum2) 0.14 0.38 -0.52 0.75 1.00 1761
## cor(Time_sum1,Time_sum2) -0.02 0.47 -0.77 0.78 1.00 770
## Tail_ESS
## sd(Intercept) 413
## sd(Time_sum1) 285
## sd(Time_sum2) 405
## cor(Intercept,Time_sum1) 4174
## cor(Intercept,Time_sum2) 2475
## cor(Time_sum1,Time_sum2) 2032
##
## ~Blind_ID:Time_c (Number of levels: 261)
## Estimate Est.Error l-90% CI u-90% CI Rhat Bulk_ESS Tail_ESS
## sd(Intercept) 1.30 0.29 0.83 1.69 1.01 290 179
##
## ~Target.verb (Number of levels: 6)
## Estimate Est.Error l-90% CI u-90% CI Rhat Bulk_ESS Tail_ESS
## sd(Intercept) 1.45 0.54 0.82 2.49 1.00 2606 4353
##
## Population-Level Effects:
## Estimate Est.Error l-90% CI u-90% CI Rhat Bulk_ESS Tail_ESS
## Intercept -2.90 0.64 -3.88 -1.81 1.00 1898 2848
## Time_sum1 -0.19 0.18 -0.49 0.10 1.00 3788 5792
## Time_sum2 0.03 0.20 -0.29 0.35 1.00 3187 5101
##
## Draws were sampled using sampling(NUTS). For each parameter, Bulk_ESS
## and Tail_ESS are effective sample size measures, and Rhat is the potential
## scale reduction factor on split chains (at convergence, Rhat = 1).
Random effects by participant, with no additional dependence between trials in a given session.
mlong0b <- brm(RESPONSE.CODE ~ 1 + Time_sum + (1 + Time_sum | Blind_ID) + (1 | Target.verb), control=list(adapt_delta=.9), family="bernoulli", prior=logit_prior, iter=4000, cores=4, data=Combined,seed=12345, file="LONG/output/mlong0b.rds")
mlong0b <- add_criterion(mlong0b, "loo")
summary(mlong0b)
## Family: bernoulli
## Links: mu = logit
## Formula: RESPONSE.CODE ~ 1 + Time_sum + (1 + Time_sum | Blind_ID) + (1 | Target.verb)
## Data: Combined (Number of observations: 2865)
## Draws: 4 chains, each with iter = 4000; warmup = 2000; thin = 1;
## total post-warmup draws = 8000
##
## Group-Level Effects:
## ~Blind_ID (Number of levels: 100)
## Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS
## sd(Intercept) 1.25 0.15 0.98 1.57 1.00 3114
## sd(Time_sum1) 1.05 0.18 0.74 1.42 1.00 3386
## sd(Time_sum2) 1.35 0.20 1.00 1.77 1.00 2634
## cor(Intercept,Time_sum1) 0.05 0.20 -0.33 0.43 1.00 3566
## cor(Intercept,Time_sum2) 0.07 0.18 -0.29 0.41 1.00 2185
## cor(Time_sum1,Time_sum2) -0.33 0.17 -0.62 0.04 1.00 1710
## Tail_ESS
## sd(Intercept) 4964
## sd(Time_sum1) 5557
## sd(Time_sum2) 4966
## cor(Intercept,Time_sum1) 5288
## cor(Intercept,Time_sum2) 3792
## cor(Time_sum1,Time_sum2) 3028
##
## ~Target.verb (Number of levels: 6)
## Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## sd(Intercept) 1.45 0.53 0.75 2.81 1.00 3771 4924
##
## Population-Level Effects:
## Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## Intercept -2.92 0.64 -4.13 -1.54 1.00 2804 4004
## Time_sum1 -0.17 0.19 -0.55 0.19 1.00 5688 5704
## Time_sum2 0.01 0.21 -0.42 0.42 1.00 4635 5225
##
## Draws were sampled using sampling(NUTS). For each parameter, Bulk_ESS
## and Tail_ESS are effective sample size measures, and Rhat is the potential
## scale reduction factor on split chains (at convergence, Rhat = 1).
#loo1 <- loo(mlong0)
#loo2 <- loo(mlong0b)
#loo3 <- reloo(mlong0b, loo2)
#model_weights(mlong0, mlong0b)
#loo_compare(loo3, loo1)
Model with within session random effects fits numerically better (though less than 1 SE elpd diff = -.4, SE = .9), but given the extra complexity, and the redundancy (slopes for time point are indistinguishable from time-point specific intercepts), I think it’s best to keep the simpler model. Additionally, I’ve run into several convergence issues with the full model.
Just to remember how everything is coded:
contrasts(Combined$Time_sum)
## [,1] [,2]
## 54 1 0
## 48 0 1
## 42 -1 -1
table(Combined$Prime.Type, Combined$Prime)
##
## DOD POD
## -0.5 0 1446
## 0.5 1419 0
table(Combined$Overlap)
##
## No Overlap Overlap
## 1355 1510
A problem with the linear model is that it makes a pretty strict assumption about the trajectory (that it’s equal between 54 and 48 and 48 and 42 months). Here I treat time categorically, using effects coding (-1, 1).
mlong_eff <- brm(RESPONSE.CODE ~ 1 + Time_sum*Prime.Type*Overlap +
(1 + Time_sum*Prime.Type*Overlap | Blind_ID ) +
(1 + Time_sum*Prime.Type*Overlap | Target.verb),
family="bernoulli",
prior=logit_prior,
iter=4000,
cores=4,
seed=12345,
data=Combined,
file="LONG/output/mlong_eff.rds")
mlong_eff <- add_criterion(mlong_eff, "loo")
Trace plots (commented for space)
#plot(mlong_eff, N = 4)
Look at residuals
mlong_eff.model.check <- createDHARMa(
simulatedResponse = t(posterior_predict(mlong_eff)),
observedResponse = Combined$RESPONSE.CODE,
fittedPredictedResponse = apply(t(posterior_epred(mlong_eff)), 1, mean),
integerResponse=TRUE
)
plot(mlong_eff.model.check) # standard plots
testDispersion(mlong_eff.model.check)
##
## DHARMa nonparametric dispersion test via sd of residuals fitted vs.
## simulated
##
## data: simulationOutput
## dispersion = 0.72622, p-value < 2.2e-16
## alternative hypothesis: two.sided
plotResiduals(mlong_eff.model.check, form=Combined$Time_sum)
Again, we have underdispersion and a very weak trend in the residuals vs fitted values.
Get coefficients (only printed fixed effects here for space, see Markdown files for random effects)
summary(mlong_eff, prob=.9)$fixed
## Estimate Est.Error l-90% CI u-90% CI
## Intercept -3.0312660 0.8549478 -4.3746539 -1.5607215
## Time_sum1 -0.2588566 0.3250338 -0.7983414 0.2638931
## Time_sum2 0.2205978 0.2939442 -0.2567553 0.7013722
## Prime.Type 0.6169287 0.3222499 0.1184692 1.1507760
## OverlapOverlap -1.0010990 0.4065189 -1.6457270 -0.3560187
## Time_sum1:Prime.Type 0.3110120 0.4416843 -0.4129051 1.0125459
## Time_sum2:Prime.Type -0.1122013 0.3734383 -0.7151050 0.4973341
## Time_sum1:OverlapOverlap 0.4907467 0.5380950 -0.3187667 1.3899418
## Time_sum2:OverlapOverlap -0.3767525 0.3283955 -0.8896421 0.1531646
## Prime.Type:OverlapOverlap 1.4776504 0.6357077 0.5139654 2.5608167
## Time_sum1:Prime.Type:OverlapOverlap -0.7430864 0.7786027 -2.0139687 0.5137010
## Time_sum2:Prime.Type:OverlapOverlap 0.6076502 0.6294585 -0.3864828 1.6744396
## Rhat Bulk_ESS Tail_ESS
## Intercept 1.002178 1565.885 2575.088
## Time_sum1 1.001019 3584.575 4365.895
## Time_sum2 1.000316 3110.602 4430.052
## Prime.Type 1.001338 4001.858 4124.043
## OverlapOverlap 1.001268 2930.794 3730.949
## Time_sum1:Prime.Type 1.000433 4192.804 4172.082
## Time_sum2:Prime.Type 1.000117 5369.447 5099.234
## Time_sum1:OverlapOverlap 1.002213 3673.954 4272.106
## Time_sum2:OverlapOverlap 1.000692 4590.521 4671.631
## Prime.Type:OverlapOverlap 1.000313 4235.138 4170.005
## Time_sum1:Prime.Type:OverlapOverlap 1.000526 3890.403 4308.942
## Time_sum2:Prime.Type:OverlapOverlap 1.001048 4471.004 4526.723
hypothesis(mlong_eff, "Prime.Type > 0")
## Hypothesis Tests for class b:
## Hypothesis Estimate Est.Error CI.Lower CI.Upper Evid.Ratio Post.Prob
## 1 (Prime.Type) > 0 0.62 0.32 0.12 1.15 44.98 0.98
## Star
## 1 *
## ---
## 'CI': 90%-CI for one-sided and 95%-CI for two-sided hypotheses.
## '*': For one-sided hypotheses, the posterior probability exceeds 95%;
## for two-sided hypotheses, the value tested against lies outside the 95%-CI.
## Posterior probabilities of point hypotheses assume equal prior probabilities.
hypothesis(mlong_eff, "Time_sum1:Prime.Type < 0")
## Hypothesis Tests for class b:
## Hypothesis Estimate Est.Error CI.Lower CI.Upper Evid.Ratio
## 1 (Time_sum1:Prime.... < 0 0.31 0.44 -0.41 1.01 0.29
## Post.Prob Star
## 1 0.22
## ---
## 'CI': 90%-CI for one-sided and 95%-CI for two-sided hypotheses.
## '*': For one-sided hypotheses, the posterior probability exceeds 95%;
## for two-sided hypotheses, the value tested against lies outside the 95%-CI.
## Posterior probabilities of point hypotheses assume equal prior probabilities.
hypothesis(mlong_eff, "Time_sum2:Prime.Type < 0")
## Hypothesis Tests for class b:
## Hypothesis Estimate Est.Error CI.Lower CI.Upper Evid.Ratio
## 1 (Time_sum2:Prime.... < 0 -0.11 0.37 -0.72 0.5 1.66
## Post.Prob Star
## 1 0.62
## ---
## 'CI': 90%-CI for one-sided and 95%-CI for two-sided hypotheses.
## '*': For one-sided hypotheses, the posterior probability exceeds 95%;
## for two-sided hypotheses, the value tested against lies outside the 95%-CI.
## Posterior probabilities of point hypotheses assume equal prior probabilities.
hypothesis(mlong_eff, "Prime.Type:OverlapOverlap > 0")
## Hypothesis Tests for class b:
## Hypothesis Estimate Est.Error CI.Lower CI.Upper Evid.Ratio
## 1 (Prime.Type:Overl... > 0 1.48 0.64 0.51 2.56 102.9
## Post.Prob Star
## 1 0.99 *
## ---
## 'CI': 90%-CI for one-sided and 95%-CI for two-sided hypotheses.
## '*': For one-sided hypotheses, the posterior probability exceeds 95%;
## for two-sided hypotheses, the value tested against lies outside the 95%-CI.
## Posterior probabilities of point hypotheses assume equal prior probabilities.
hypothesis(mlong_eff, "Time_sum1:Prime.Type:OverlapOverlap > 0")
## Hypothesis Tests for class b:
## Hypothesis Estimate Est.Error CI.Lower CI.Upper Evid.Ratio
## 1 (Time_sum1:Prime.... > 0 -0.74 0.78 -2.01 0.51 0.18
## Post.Prob Star
## 1 0.15
## ---
## 'CI': 90%-CI for one-sided and 95%-CI for two-sided hypotheses.
## '*': For one-sided hypotheses, the posterior probability exceeds 95%;
## for two-sided hypotheses, the value tested against lies outside the 95%-CI.
## Posterior probabilities of point hypotheses assume equal prior probabilities.
hypothesis(mlong_eff, "Time_sum2:Prime.Type:OverlapOverlap > 0")
## Hypothesis Tests for class b:
## Hypothesis Estimate Est.Error CI.Lower CI.Upper Evid.Ratio
## 1 (Time_sum2:Prime.... > 0 0.61 0.63 -0.39 1.67 5.45
## Post.Prob Star
## 1 0.84
## ---
## 'CI': 90%-CI for one-sided and 95%-CI for two-sided hypotheses.
## '*': For one-sided hypotheses, the posterior probability exceeds 95%;
## for two-sided hypotheses, the value tested against lies outside the 95%-CI.
## Posterior probabilities of point hypotheses assume equal prior probabilities.
Plot, predicted values vs data
p0<- p0 + theme(axis.title.x = element_blank())
p2 <- fitted(mlong_eff, re_formula=NA, scale="response") %>%
cbind(Combined) %>%
dplyr::select(Prime, Overlap, Time_c, Estimate, Est.Error) %>%
ggplot(aes(y=Estimate, x=Overlap, fill=Prime )) +
geom_bar(stat="identity",position="dodge") +
geom_errorbar(aes(ymin=Estimate, ymax=Estimate+Est.Error), width=.2,
position=position_dodge(.9)) +
facet_wrap(~Time_c) +
theme_minimal() +
ylim(0, .4) +
labs(y="Predicted Probability") +
scale_fill_manual(values=cbPalette)
p0/p2 + plot_layout(guides = "collect")
ggsave("plots/Fig5.png", last_plot(), dpi=500)
## Saving 7 x 5 in image
Get the time point specific effects (abstract priming effect, lexical boost and priming effect with lexical overlap)
# First make dummy data set with fixed effects only, to get posterior samples for each crossing of Time x Prime Structure x Overlap
df <- tibble( # Design matrix (minus intercept which is automatic)
Overlap = factor(rep(c(0, 0, 1, 1), 3), labels = c("No Overlap", "Overlap")),
Prime.Type = rep(c(-.5, .5, -.5, .5), 3),
Time_sum = c(rep(54, 4), rep(48, 4), rep(42, 4))
)
# Getting samples on logit scale
ps_means <- posterior_linpred(mlong_eff, newdata=df, re_formula=NA) %>%
t() %>%
as_tibble() %>%
bind_cols(df, .) %>%
pivot_longer(starts_with("V"), names_to="sample", values_to="linpred") %>%
mutate( # some re-coding to make things more readable later
Time_c = ifelse(Time_sum==54, yes="54", no =
ifelse(Time_sum==48, yes="48", no = "42")),
Prime.Type = ifelse(Prime.Type == .5, yes="DOD", no = "POD"),
Overlap = ifelse(Overlap=="Overlap", yes="Ov", no = "No")
) %>%
dplyr::select(-"Time_sum") %>%
pivot_wider(names_from=c("Overlap", "Prime.Type", "Time_c"), values_from="linpred") %>%
mutate( # calculate time-point specific effects
Abstract_42 = No_DOD_42 - No_POD_42,
LexOver_42 = Ov_DOD_42 - Ov_POD_42,
Abstract_48 = No_DOD_48 - No_POD_48,
LexOver_48 = Ov_DOD_48 - Ov_POD_48,
Abstract_54 = No_DOD_54 - No_POD_54,
LexOver_54= Ov_DOD_54 - Ov_POD_54,
LB_42 = LexOver_42 - Abstract_42,
LB_48 = LexOver_48 - Abstract_48,
LB_54 = LexOver_54 - Abstract_54
) %>%
dplyr::select("sample", starts_with(c("Abs", "Lex", "LB"))) %>%
pivot_longer(
starts_with(c("Abs", "Lex", "LB")),
names_to = c("Effect", "Time"),
names_sep = "_",
values_to = "Logits"
) %>%
mutate(
Effect = ifelse(Effect=="Abstract", yes = Effect, no = ifelse(
Effect=="LexOver", yes = "Overlap", no = "Lexical Boost")),
Time = paste(Time, "months"),
Condition = paste(Time, Effect)
)
## Warning: The `x` argument of `as_tibble.matrix()` must have unique column names if
## `.name_repair` is omitted as of tibble 2.0.0.
## ℹ Using compatibility `.name_repair`.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
ps_means_p <- ps_means %>%
group_by(Condition) %>%
summarise(
P = sum(Logits > 0)/8000 # calculate proportion of samples above 0 for each effect
)
p1 <- ps_means %>% # plot priming effects with and without overlap
mutate( # re-level conditions so that 42 months is at the top of the figure.
Condition = factor(Condition, levels = c("54 months Lexical Boost", "54 months Overlap", "54 months Abstract",
"48 months Lexical Boost", "48 months Overlap", "48 months Abstract",
"42 months Lexical Boost", "42 months Overlap", "42 months Abstract")
)) %>%
filter(Effect != "Lexical Boost") %>%
ggplot(aes(y=Condition, x=Logits)) + stat_halfeye(.width=c(.66, .90)) + geom_vline(xintercept = 0) + theme_minimal() +
annotate("text", x=7, y=6.15, label= paste("p =", round(ps_means_p[1, 2], 2))) +
annotate("text", x=7, y=5.15, label= paste("p =", round(ps_means_p[3, 2], 2))) +
annotate("text", x=7, y=4.15, label= paste("p =", round(ps_means_p[4, 2], 2))) +
annotate("text", x=7, y=3.15, label= paste("p =", round(ps_means_p[6, 2], 2))) +
annotate("text", x=7, y=2.15, label= paste("p =", round(ps_means_p[7, 2], 2))) +
annotate("text", x=7, y=1.15, label= paste("p =", round(ps_means_p[9, 2], 2))) +
theme(axis.title.x=element_blank(), axis.title.y=element_blank(), axis.text.x=element_blank())
p2 <- ps_means %>% # plot lexical boosts
mutate( # re-level conditions so that 42 months is at the top of the figure.
Condition = factor(Condition, levels = c("54 months Lexical Boost", "54 months Overlap", "54 months Abstract",
"48 months Lexical Boost", "48 months Overlap", "48 months Abstract",
"42 months Lexical Boost", "42 months Overlap", "42 months Abstract")
)) %>%
filter(Effect == "Lexical Boost") %>%
ggplot(aes(y=Condition, x=Logits)) + stat_halfeyeh(.width=c(.66, .90)) + geom_vline(xintercept = 0) + theme_minimal() +
annotate("text", x=7, y=3.15, label= paste("p =", round(ps_means_p[2, 2], 2))) +
annotate("text", x=7, y=2.15, label= paste("p =", round(ps_means_p[5, 2], 2))) +
annotate("text", x=7, y=1.15, label= paste("p =", round(ps_means_p[8, 2], 2))) +
labs(y="Effect") +
theme(axis.title.y=element_blank())
## Warning: 'stat_halfeyeh' is deprecated.
## Use 'stat_halfeye' instead.
## See help("Deprecated") and help("tidybayes-deprecated").
(p1 / p2) + labs(y = "Effect")
ggsave("plots/Fig6.png", last_plot(), dpi=500)
## Saving 7 x 5 in image
I have also run some sensitivity analyses, which were suppressed from the R Markdown file. These tested whether (a) dropping participants who completed the 54 month sesison on Zoom, (b) dropping the mis-administered trial, (c) forcing uncorrelated random effects or (d) dropping trials with high pareto’s K changed the results. Results were qualitatively similar to those above. For space reasons I have suppressed these from the html file, but see R markdown file associated with this project for more information.
Include only participants who produced a DOD at first time point.
This model gave some divergent transitions and low effective sample sizes, so I up-ed the adapt delta and added some more iterations.
Combined2 <- filter(Combined, Prod_DOD > 0)
mlong2_eff <- brm(RESPONSE.CODE ~ 1 + Time_sum*Prime.Type*Overlap +
(1 + Time_sum*Prime.Type*Overlap | Blind_ID) +
(1 + Time_sum*Prime.Type*Overlap | Target.verb),
family="bernoulli",
prior=logit_prior,
iter=5000,
cores=4,
seed=12345,
control=list(adapt_delta=.95),
data=Combined2,
file="LONG/output/mlong2_eff.rds")
mlong2_eff <- add_criterion(mlong2_eff, "loo")
#plot(mlong2_eff, N= 4)
mlong2_eff.model.check <- createDHARMa(
simulatedResponse = t(posterior_predict(mlong2_eff)),
observedResponse = Combined2$RESPONSE.CODE,
fittedPredictedResponse = apply(t(posterior_epred(mlong2_eff)), 1, mean),
integerResponse=TRUE
)
plot(mlong2_eff.model.check) # standard plots
## DHARMa:testOutliers with type = binomial may have inflated Type I error rates for integer-valued distributions. To get a more exact result, it is recommended to re-run testOutliers with type = 'bootstrap'. See ?testOutliers for details
testDispersion(mlong2_eff.model.check)
##
## DHARMa nonparametric dispersion test via sd of residuals fitted vs.
## simulated
##
## data: simulationOutput
## dispersion = 0.69737, p-value < 2.2e-16
## alternative hypothesis: two.sided
plotResiduals(mlong2_eff.model.check, form=Combined2$Time_sum)
Significant difference in residual variances across time (in particular, more undersdispersion at last time point)
summary(mlong2_eff)$fixed
## Estimate Est.Error l-95% CI u-95% CI
## Intercept -2.32645220 0.8865980 -4.0093346 -0.4661804
## Time_sum1 -0.54971387 0.4097927 -1.3945221 0.2514227
## Time_sum2 -0.40248303 0.3498083 -1.1153815 0.2636639
## Prime.Type 0.60058708 0.4235028 -0.1891931 1.4798969
## OverlapOverlap -1.14608547 0.4856274 -2.1975009 -0.2684393
## Time_sum1:Prime.Type -0.02866171 0.5074409 -1.0549500 0.9552859
## Time_sum2:Prime.Type 0.13921543 0.4862987 -0.7977964 1.0992696
## Time_sum1:OverlapOverlap 0.11120742 0.6589775 -1.1802128 1.4536143
## Time_sum2:OverlapOverlap -0.13708739 0.4972115 -1.1188479 0.8382416
## Prime.Type:OverlapOverlap 2.07772073 0.9192851 0.3587913 4.0255821
## Time_sum1:Prime.Type:OverlapOverlap -0.13108920 0.8362947 -1.8189339 1.4981539
## Time_sum2:Prime.Type:OverlapOverlap 0.06320939 0.8265207 -1.4994080 1.7488969
## Rhat Bulk_ESS Tail_ESS
## Intercept 1.0004938 3007.830 5223.710
## Time_sum1 0.9999707 6487.111 5534.679
## Time_sum2 1.0005988 6004.676 5817.865
## Prime.Type 1.0004425 7504.172 7502.811
## OverlapOverlap 1.0009754 5138.638 6875.435
## Time_sum1:Prime.Type 1.0002606 8222.877 6744.853
## Time_sum2:Prime.Type 1.0004784 8079.467 6860.411
## Time_sum1:OverlapOverlap 1.0003730 5704.203 6016.118
## Time_sum2:OverlapOverlap 1.0001887 7425.760 5664.096
## Prime.Type:OverlapOverlap 1.0001290 6623.562 6533.937
## Time_sum1:Prime.Type:OverlapOverlap 0.9999754 8265.991 7087.933
## Time_sum2:Prime.Type:OverlapOverlap 0.9999875 7074.946 6908.085
hypothesis(mlong2_eff, "Prime.Type > 0")
## Hypothesis Tests for class b:
## Hypothesis Estimate Est.Error CI.Lower CI.Upper Evid.Ratio Post.Prob
## 1 (Prime.Type) > 0 0.6 0.42 -0.05 1.29 14.65 0.94
## Star
## 1
## ---
## 'CI': 90%-CI for one-sided and 95%-CI for two-sided hypotheses.
## '*': For one-sided hypotheses, the posterior probability exceeds 95%;
## for two-sided hypotheses, the value tested against lies outside the 95%-CI.
## Posterior probabilities of point hypotheses assume equal prior probabilities.
hypothesis(mlong2_eff, "Time_sum1:Prime.Type > 0")
## Hypothesis Tests for class b:
## Hypothesis Estimate Est.Error CI.Lower CI.Upper Evid.Ratio
## 1 (Time_sum1:Prime.... > 0 -0.03 0.51 -0.87 0.79 0.9
## Post.Prob Star
## 1 0.47
## ---
## 'CI': 90%-CI for one-sided and 95%-CI for two-sided hypotheses.
## '*': For one-sided hypotheses, the posterior probability exceeds 95%;
## for two-sided hypotheses, the value tested against lies outside the 95%-CI.
## Posterior probabilities of point hypotheses assume equal prior probabilities.
hypothesis(mlong2_eff, "Time_sum2:Prime.Type > 0")
## Hypothesis Tests for class b:
## Hypothesis Estimate Est.Error CI.Lower CI.Upper Evid.Ratio
## 1 (Time_sum2:Prime.... > 0 0.14 0.49 -0.64 0.93 1.6
## Post.Prob Star
## 1 0.62
## ---
## 'CI': 90%-CI for one-sided and 95%-CI for two-sided hypotheses.
## '*': For one-sided hypotheses, the posterior probability exceeds 95%;
## for two-sided hypotheses, the value tested against lies outside the 95%-CI.
## Posterior probabilities of point hypotheses assume equal prior probabilities.
hypothesis(mlong2_eff, "Prime.Type:OverlapOverlap > 0")
## Hypothesis Tests for class b:
## Hypothesis Estimate Est.Error CI.Lower CI.Upper Evid.Ratio
## 1 (Prime.Type:Overl... > 0 2.08 0.92 0.66 3.64 88.29
## Post.Prob Star
## 1 0.99 *
## ---
## 'CI': 90%-CI for one-sided and 95%-CI for two-sided hypotheses.
## '*': For one-sided hypotheses, the posterior probability exceeds 95%;
## for two-sided hypotheses, the value tested against lies outside the 95%-CI.
## Posterior probabilities of point hypotheses assume equal prior probabilities.
hypothesis(mlong2_eff, "Time_sum1:Prime.Type:OverlapOverlap > 0")
## Hypothesis Tests for class b:
## Hypothesis Estimate Est.Error CI.Lower CI.Upper Evid.Ratio
## 1 (Time_sum1:Prime.... > 0 -0.13 0.84 -1.51 1.21 0.77
## Post.Prob Star
## 1 0.43
## ---
## 'CI': 90%-CI for one-sided and 95%-CI for two-sided hypotheses.
## '*': For one-sided hypotheses, the posterior probability exceeds 95%;
## for two-sided hypotheses, the value tested against lies outside the 95%-CI.
## Posterior probabilities of point hypotheses assume equal prior probabilities.
hypothesis(mlong2_eff, "Time_sum2:Prime.Type:OverlapOverlap > 0")
## Hypothesis Tests for class b:
## Hypothesis Estimate Est.Error CI.Lower CI.Upper Evid.Ratio
## 1 (Time_sum2:Prime.... > 0 0.06 0.83 -1.25 1.43 1.09
## Post.Prob Star
## 1 0.52
## ---
## 'CI': 90%-CI for one-sided and 95%-CI for two-sided hypotheses.
## '*': For one-sided hypotheses, the posterior probability exceeds 95%;
## for two-sided hypotheses, the value tested against lies outside the 95%-CI.
## Posterior probabilities of point hypotheses assume equal prior probabilities.
Check if conditional effects differ.
df <- tibble( # Design matrix (minus intercept which is automatic)
Overlap = factor(rep(c(0, 0, 1, 1), 3), labels = c("No Overlap", "Overlap")),
Prime.Type = rep(c(-.5, .5, -.5, .5), 3),
Time_sum = c(rep(54, 4), rep(48, 4), rep(42, 4))
)
ps_means <- posterior_linpred(mlong2_eff, newdata=df, re_formula=NA) %>%
t() %>% # posterior samples of linear predictor.
as_tibble() %>%
bind_cols(df, .) %>%
pivot_longer(starts_with("V"), names_to="sample", values_to="linpred") %>%
mutate(
Time_c = ifelse(Time_sum==54, yes="54", no =
ifelse(Time_sum==48, yes="48", no = "42")),
Prime.Type = ifelse(Prime.Type == .5, yes="DOD", no = "POD"),
Overlap = ifelse(Overlap=="Overlap", yes="Ov", no = "No")
) %>%
dplyr::select(-"Time_sum") %>%
pivot_wider(names_from=c("Overlap", "Prime.Type", "Time_c"), values_from="linpred") %>%
mutate(
Abstract_42 = No_DOD_42 - No_POD_42,
LexOver_42 = Ov_DOD_42 - Ov_POD_42,
Abstract_48 = No_DOD_48 - No_POD_48,
LexOver_48 = Ov_DOD_48 - Ov_POD_48,
Abstract_54 = No_DOD_54 - No_POD_54,
LexOver_54= Ov_DOD_54- Ov_POD_54,
LB_42 = LexOver_42 - Abstract_42,
LB_48 = LexOver_48 - Abstract_48,
LB_54 = LexOver_54 - Abstract_54
) %>%
dplyr::select("sample", starts_with(c("Abs", "Lex", "LB"))) %>%
pivot_longer(
starts_with(c("Abs", "Lex", "LB")),
names_to = c("Effect", "Time"),
names_sep = "_",
values_to = "Logits"
) %>%
mutate(
Effect = ifelse(Effect=="Abstract", yes = Effect, no = ifelse(
Effect=="LexOver", yes = "Overlap", no = "Lexical Boost")),
Time = paste(Time, "months"),
Condition = paste(Time, Effect)
)
ps_means_p <- ps_means %>%
group_by(Condition) %>%
summarise(
P = sum(Logits > 0)/10000 # increased denominator here because more iterations in intial model.
)
p1 <- ps_means %>% # plot priming effects with and without overlap
mutate( # re-level conditions so that 42 months is at the top of the figure.
Condition = factor(Condition, levels = c("54 months Lexical Boost", "54 months Overlap", "54 months Abstract",
"48 months Lexical Boost", "48 months Overlap", "48 months Abstract",
"42 months Lexical Boost", "42 months Overlap", "42 months Abstract")
)) %>%
filter(Effect != "Lexical Boost") %>%
ggplot(aes(y=Condition, x=Logits)) + stat_halfeye(.width=c(.66, .90)) + geom_vline(xintercept = 0) + theme_minimal() +
annotate("text", x=7, y=6.15, label= paste("p =", round(ps_means_p[1, 2], 2))) +
annotate("text", x=7, y=5.15, label= paste("p =", round(ps_means_p[3, 2], 2))) +
annotate("text", x=7, y=4.15, label= paste("p =", round(ps_means_p[4, 2], 2))) +
annotate("text", x=7, y=3.15, label= paste("p =", round(ps_means_p[6, 2], 2))) +
annotate("text", x=7, y=2.15, label= paste("p =", round(ps_means_p[7, 2], 2))) +
annotate("text", x=7, y=1.15, label= paste("p =", round(ps_means_p[9, 2], 2))) +
theme(axis.title.x=element_blank(), axis.title.y=element_blank(), axis.text.x=element_blank())
p2 <- ps_means %>% # plot lexical boosts
mutate( # re-level conditions so that 42 months is at the top of the figure.
Condition = factor(Condition, levels = c("54 months Lexical Boost", "54 months Overlap", "54 months Abstract",
"48 months Lexical Boost", "48 months Overlap", "48 months Abstract",
"42 months Lexical Boost", "42 months Overlap", "42 months Abstract")
)) %>%
filter(Effect == "Lexical Boost") %>%
ggplot(aes(y=Condition, x=Logits)) + stat_halfeyeh(.width=c(.66, .90)) + geom_vline(xintercept = 0) + theme_minimal() +
annotate("text", x=7, y=3.15, label= paste("p =", round(ps_means_p[2, 2], 2))) +
annotate("text", x=7, y=2.15, label= paste("p =", round(ps_means_p[5, 2], 2))) +
annotate("text", x=7, y=1.15, label= paste("p =", round(ps_means_p[8, 2], 2))) +
labs(y="Effect") +
theme(axis.title.y=element_blank())
## Warning: 'stat_halfeyeh' is deprecated.
## Use 'stat_halfeye' instead.
## See help("Deprecated") and help("tidybayes-deprecated").
(p1 / p2) + labs(y = "Effect")
ggsave("plots/Fig7.png", last_plot(), dpi=500)
## Saving 7 x 5 in image
I’ve conducted the same sensitivity analyses as before, but excluded them from the html file.
# Drop participants who completed 54 month session on zoom
mlong2_effa <- Combined2 %>%
filter(is.na(COVID.Zoom.session)) %>%
brm(RESPONSE.CODE ~ 1 + Time_sum*Prime.Type*Overlap +
(1 + Time_sum*Prime.Type*Overlap| Blind_ID ) +
(1 + Time_sum*Prime.Type*Overlap | Target.verb),
family="bernoulli",
iter=2000,
cores=4,
prior=logit_prior,
seed=12345,
data=.,
file="LONG/output/mlong2_effa.rds")
hypothesis(mlong2_effa, "Prime.Type > 0")
hypothesis(mlong2_effa, "Time_sum1:Prime.Type < 0")
hypothesis(mlong2_effa, "Time_sum2:Prime.Type < 0")
hypothesis(mlong2_effa, "Prime.Type:OverlapOverlap > 0")
hypothesis(mlong2_effa, "Time_sum1:Prime.Type:OverlapOverlap > 0")
hypothesis(mlong2_effa, "Time_sum2:Prime.Type:OverlapOverlap > 0")
# Drop problematic trial
mlong2_effb <- Combined2 %>%
filter(Trial_Problem == 0) %>%
brm(RESPONSE.CODE ~ 1 + Time_sum*Prime.Type*Overlap +
(1 + Time_sum*Prime.Type*Overlap| Blind_ID ) +
(1 + Time_sum*Prime.Type*Overlap | Target.verb),
family="bernoulli",
iter=2000,
cores=4,
prior=logit_prior,
seed=12345,
data=.,
file="LONG/output/mlong2_effb.rds")
hypothesis(mlong2_effb, "Prime.Type > 0")
hypothesis(mlong2_effb, "Time_sum1:Prime.Type < 0")
hypothesis(mlong2_effb, "Time_sum2:Prime.Type < 0")
hypothesis(mlong2_effb, "Prime.Type:OverlapOverlap > 0")
hypothesis(mlong2_effb, "Time_sum1:Prime.Type:OverlapOverlap > 0")
hypothesis(mlong2_effb, "Time_sum2:Prime.Type:OverlapOverlap > 0")
# uncorrelated random effects
mlong2_effc <- brm(RESPONSE.CODE ~ 1 + Time_sum*Prime.Type*Overlap +
(1 + Time_sum*Prime.Type*Overlap|| Blind_ID ) +
(1 + Time_sum*Prime.Type*Overlap || Target.verb),
family="bernoulli",
prior=logit_prior,
iter=2000,
cores=4,
seed=12345,
data=Combined2,
file="LONG/output/mlong2_effc.rds")
hypothesis(mlong2_effc, "Prime.Type > 0")
hypothesis(mlong2_effc, "Time_sum1:Prime.Type < 0")
hypothesis(mlong2_effc, "Time_sum2:Prime.Type < 0")
hypothesis(mlong2_effc, "Prime.Type:OverlapOverlap > 0")
hypothesis(mlong2_effc, "Time_sum1:Prime.Type:OverlapOverlap > 0")
hypothesis(mlong2_effc, "Time_sum2:Prime.Type:OverlapOverlap > 0")
# drop high pareto's k.
mlong2_effd <- Combined2 %>%
mutate(
k = mlong2_eff$criteria$loo$diagnostics$pareto_k
) %>%
filter(k < .7) %>%
brm(RESPONSE.CODE ~ 1 + Time_sum*Prime.Type*Overlap +
(1 + Time_sum*Prime.Type*Overlap| Blind_ID ) +
(1 + Time_sum*Prime.Type*Overlap | Target.verb),
family="bernoulli",
prior=logit_prior,
iter=2000,
cores=4,
seed=12345,
data=.,
file="LONG/output/mlong_effd.rds")
hypothesis(mlong2_effd, "Prime.Type > 0")
hypothesis(mlong2_effd, "Time_sum1:Prime.Type < 0")
hypothesis(mlong2_effd, "Time_sum2:Prime.Type < 0")
hypothesis(mlong2_effd, "Prime.Type:OverlapOverlap > 0")
hypothesis(mlong2_effd, "Time_sum1:Prime.Type:OverlapOverlap > 0")
hypothesis(mlong2_effd, "Time_sum2:Prime.Type:OverlapOverlap > 0")
mlong5_eff <- brm(RESPONSE.CODE ~ 1 + Time_sum*+Prime.Type*biasmatch +
(1 + Time_sum*Prime.Type*biasmatch | Blind_ID ) +
(1 + Time_sum*Prime.Type*biasmatch | Target.verb),
family="bernoulli",
prior=logit_prior,
iter=4000,
seed=12345,
cores=4,
data=Combined %>% filter(Overlap=="No Overlap"),
file="LONG/output/mlong5_eff.rds")
mlong5_eff <- add_criterion(mlong5_eff, "loo")
Some of the chains on random effects don’t look well mixed (though R Hats are all <= 1.01). I will try simplifying the random effects structure as well.
#plot(mlong5_eff, N=4)
mlong5_eff.model.check <- createDHARMa(
simulatedResponse = t(posterior_predict(mlong5_eff)),
observedResponse = filter(Combined, Overlap=="No Overlap")$RESPONSE.CODE,
fittedPredictedResponse = apply(t(posterior_epred(mlong5_eff)), 1, mean),
integerResponse=TRUE
)
plot(mlong5_eff.model.check) # standard plots
testDispersion(mlong5_eff.model.check)
##
## DHARMa nonparametric dispersion test via sd of residuals fitted vs.
## simulated
##
## data: simulationOutput
## dispersion = 0.59289, p-value < 2.2e-16
## alternative hypothesis: two.sided
plotResiduals(mlong5_eff.model.check, form=filter(Combined, Overlap=="No Overlap")$Time_sum)
Stronger pattern of heteroskedasticity (very large residuals at the high predicted values.)
summary(mlong5_eff)$fixed
## Estimate Est.Error l-95% CI
## Intercept -2.89006787 0.9291285 -4.6978773
## Time_sum1 -0.27433667 0.4024637 -1.0459088
## Time_sum2 0.17029651 0.3796431 -0.5669265
## Prime.Type 0.76053534 0.5863900 -0.4032202
## biasmatchmismatch -1.12795385 0.5332996 -2.2571572
## Time_sum1:Prime.Type 0.16927541 0.6440303 -1.1258569
## Time_sum2:Prime.Type -0.40591784 0.6014875 -1.6270601
## Time_sum1:biasmatchmismatch 0.21305375 0.5986301 -0.9640565
## Time_sum2:biasmatchmismatch -0.04044774 0.6056338 -1.2683294
## Prime.Type:biasmatchmismatch 0.47346771 0.9311701 -1.2731436
## Time_sum1:Prime.Type:biasmatchmismatch 0.31074103 1.0176411 -1.7283992
## Time_sum2:Prime.Type:biasmatchmismatch 0.59458758 0.9586195 -1.3098092
## u-95% CI Rhat Bulk_ESS Tail_ESS
## Intercept -0.9639915 1.000903 2313.193 3835.871
## Time_sum1 0.5414602 1.000443 4966.715 5024.343
## Time_sum2 0.9302729 1.000437 4674.957 5094.940
## Prime.Type 1.9755630 1.000294 4624.824 3804.005
## biasmatchmismatch -0.1492602 1.001312 3568.747 4865.744
## Time_sum1:Prime.Type 1.4299039 1.000202 5365.598 5384.595
## Time_sum2:Prime.Type 0.7518175 1.000551 5671.926 5318.146
## Time_sum1:biasmatchmismatch 1.4098920 1.000180 4498.873 5032.982
## Time_sum2:biasmatchmismatch 1.1347019 1.000038 4646.067 5880.978
## Prime.Type:biasmatchmismatch 2.4753917 1.000511 4734.724 3754.676
## Time_sum1:Prime.Type:biasmatchmismatch 2.3428495 1.000791 5353.698 5694.695
## Time_sum2:Prime.Type:biasmatchmismatch 2.5108849 1.000553 5215.156 5170.872
hypothesis(mlong5_eff, "Prime.Type > 0")
## Hypothesis Tests for class b:
## Hypothesis Estimate Est.Error CI.Lower CI.Upper Evid.Ratio Post.Prob
## 1 (Prime.Type) > 0 0.76 0.59 -0.18 1.72 11.44 0.92
## Star
## 1
## ---
## 'CI': 90%-CI for one-sided and 95%-CI for two-sided hypotheses.
## '*': For one-sided hypotheses, the posterior probability exceeds 95%;
## for two-sided hypotheses, the value tested against lies outside the 95%-CI.
## Posterior probabilities of point hypotheses assume equal prior probabilities.
hypothesis(mlong5_eff, "Prime.Type:biasmatchmismatch > 0")
## Hypothesis Tests for class b:
## Hypothesis Estimate Est.Error CI.Lower CI.Upper Evid.Ratio
## 1 (Prime.Type:biasm... > 0 0.47 0.93 -0.93 2.05 2.29
## Post.Prob Star
## 1 0.7
## ---
## 'CI': 90%-CI for one-sided and 95%-CI for two-sided hypotheses.
## '*': For one-sided hypotheses, the posterior probability exceeds 95%;
## for two-sided hypotheses, the value tested against lies outside the 95%-CI.
## Posterior probabilities of point hypotheses assume equal prior probabilities.
hypothesis(mlong5_eff, "Time_sum1:Prime.Type:biasmatchmismatch < 0")
## Hypothesis Tests for class b:
## Hypothesis Estimate Est.Error CI.Lower CI.Upper Evid.Ratio
## 1 (Time_sum1:Prime.... < 0 0.31 1.02 -1.38 1.96 0.59
## Post.Prob Star
## 1 0.37
## ---
## 'CI': 90%-CI for one-sided and 95%-CI for two-sided hypotheses.
## '*': For one-sided hypotheses, the posterior probability exceeds 95%;
## for two-sided hypotheses, the value tested against lies outside the 95%-CI.
## Posterior probabilities of point hypotheses assume equal prior probabilities.
hypothesis(mlong5_eff, "Time_sum2:Prime.Type:biasmatchmismatch < 0")
## Hypothesis Tests for class b:
## Hypothesis Estimate Est.Error CI.Lower CI.Upper Evid.Ratio
## 1 (Time_sum2:Prime.... < 0 0.59 0.96 -0.96 2.18 0.36
## Post.Prob Star
## 1 0.26
## ---
## 'CI': 90%-CI for one-sided and 95%-CI for two-sided hypotheses.
## '*': For one-sided hypotheses, the posterior probability exceeds 95%;
## for two-sided hypotheses, the value tested against lies outside the 95%-CI.
## Posterior probabilities of point hypotheses assume equal prior probabilities.
Get Empirical Means and Predicted Values
(p2 <- Combined %>%
filter(Overlap=="No Overlap") %>%
group_by(Prime, biasmatch, Time_c) %>%
summarise(mean = mean(RESPONSE.CODE, na.rm=TRUE),
se = std.error(RESPONSE.CODE, na.rm=TRUE)) %>%
ggplot(aes(y=mean, x=biasmatch, fill=Prime)) +
geom_bar(stat="identity",position="dodge") +
geom_errorbar(aes(ymin=mean, ymax=mean+se), width=.2,
position=position_dodge(.9)) +
facet_wrap(~Time_c) +
ylim(0, .4) +
theme_minimal() +
labs(y="Raw Mean") +
scale_fill_manual(values=cbPalette) +
theme(axis.title.x = element_blank())
)
## `summarise()` has grouped output by 'Prime', 'biasmatch'. You can override
## using the `.groups` argument.
p5 <- fitted(mlong5_eff, re_formula=NA, scale="response") %>%
cbind(Combined %>% filter(Overlap=="No Overlap")) %>%
dplyr::select(Prime, biasmatch, Time_c, Estimate, Est.Error) %>%
ggplot(aes(y=Estimate, x=biasmatch, fill=Prime)) +
geom_bar(stat="identity",position="dodge") +
geom_errorbar(aes(ymin=Estimate, ymax=Estimate+Est.Error), width=.2,
position=position_dodge(.9)) +
facet_wrap(~Time_c) +
theme_minimal() +
ylim(0, .4) +
labs(y="Predicted Probability") +
scale_fill_manual(values=cbPalette) +
theme(strip.text = element_blank())
p2/p5 + plot_layout(guides = "collect")
ggsave("plots/Fig8.png", last_plot(), dpi=500)
## Saving 7 x 5 in image
df <- tibble( # Design matrix (minus intercept which is automatic)
biasmatch = factor(rep(c(0, 0, 1, 1), 3), labels = c("match", "mismatch")),
Prime.Type = rep(c(-.5, .5, -.5, .5), 3),
Time_sum = c(rep(54, 4), rep(48, 4), rep(42, 4))
)
ps_means <- posterior_linpred(mlong5_eff, newdata=df, re_formula=NA) %>%
t() %>% # posterior samples of linear predictor.
as_tibble() %>%
bind_cols(df, .) %>%
pivot_longer(starts_with("V"), names_to="sample", values_to="linpred") %>%
mutate(
Time_c = ifelse(Time_sum==54, yes="54", no =
ifelse(Time_sum==48, yes="48", no = "42")),
Prime.Type = ifelse(Prime.Type == .5, yes="DOD", no = "POD"),
biasmatch = ifelse(biasmatch=="mismatch", yes="mm", no = "m")
) %>%
dplyr::select(-"Time_sum") %>%
pivot_wider(names_from=c("biasmatch", "Prime.Type", "Time_c"), values_from="linpred") %>%
mutate(
Match_42 = m_DOD_42 - m_POD_42,
Mismatch_42 = mm_DOD_42 - mm_POD_42,
Match_48 = m_DOD_48 - m_POD_48,
Mismatch_48 = mm_DOD_48 - mm_POD_48,
Match_54 = m_DOD_54 - m_POD_54,
Mismatch_54= mm_DOD_54- mm_POD_54,
Surprisal_42 = Mismatch_42 - Match_42,
Surprisal_48 = Mismatch_48 - Match_48,
Surprisal_54 = Mismatch_54 - Match_54
) %>%
dplyr::select("sample", starts_with(c("Match", "Mismatch", "Surprisal"))) %>%
pivot_longer(
starts_with(c("Match", "Mismatch", "Surprisal")),
names_to = c("Effect", "Time"),
names_sep = "_",
values_to = "Logits"
) %>%
mutate(
Time = paste(Time, "months"),
Condition = paste(Time, Effect)
)
ps_means_p <- ps_means %>%
group_by(Condition) %>%
summarise(
P = sum(Logits > 0)/8000
)
p1 <- ps_means %>%
mutate( # re-level conditions so that 42 months is at the top of the figure.
Condition = factor(Condition, levels = c("54 months Surprisal", "54 months Mismatch", "54 months Match",
"48 months Surprisal", "48 months Mismatch", "48 months Match",
"42 months Surprisal", "42 months Mismatch", "42 months Match")
)) %>%
filter(Effect != "Surprisal") %>%
ggplot(aes(y=Condition, x=Logits)) + stat_halfeye(.width=c(.66, .90)) + geom_vline(xintercept = 0) + theme_minimal() +
annotate("text", x=7, y=6.15, label= paste("p =", round(ps_means_p[1, 2], 2))) +
annotate("text", x=7, y=5.15, label= paste("p =", round(ps_means_p[2, 2], 2))) +
annotate("text", x=7, y=4.15, label= paste("p =", round(ps_means_p[4, 2], 2))) +
annotate("text", x=7, y=3.15, label= paste("p =", round(ps_means_p[5, 2], 2))) +
annotate("text", x=7, y=2.15, label= paste("p =", round(ps_means_p[7, 2], 2))) +
annotate("text", x=7, y=1.15, label= paste("p =", round(ps_means_p[8, 2], 2))) +
theme(axis.title.x=element_blank(), axis.title.y=element_blank(), axis.text.x=element_blank())
p2 <- ps_means %>%
mutate( # re-level conditions so that 42 months is at the top of the figure.
Condition = factor(Condition, levels = c("54 months Surprisal", "54 months Mismatch", "54 months Match",
"48 months Surprisal", "48 months Mismatch", "48 months Match",
"42 months Surprisal", "42 months Mismatch", "42 months Match")
)) %>%
filter(Effect == "Surprisal") %>%
ggplot(aes(y=Condition, x=Logits)) + stat_halfeye(.width=c(.66, .90)) + geom_vline(xintercept = 0) + theme_minimal() +
annotate("text", x=7, y=3.15, label= paste("p =", round(ps_means_p[3, 2], 2))) +
annotate("text", x=7, y=2.15, label= paste("p =", round(ps_means_p[6, 2], 2))) +
annotate("text", x=7, y=1.15, label= paste("p =", round(ps_means_p[9, 2], 2))) +
labs(y="Effect") +
theme(axis.title.y=element_blank())
(p1 / p2) + labs(y = "Effect")
ggsave("plots/Fig9.png", last_plot(), dpi=500)
## Saving 7 x 5 in image
mlong6_eff <- brm(RESPONSE.CODE ~ 1 + Time_sum*+Prime.Type*biasmatch +
(1 + Time_sum*Prime.Type*biasmatch | Blind_ID ) +
(1 + Time_sum*Prime.Type*biasmatch | Target.verb),
family="bernoulli",
prior=logit_prior,
iter=4000,
cores=4,
seed=12345,
data=Combined2 %>% filter(Overlap=="No Overlap"),
file="LONG/output/mlong6_eff.rds")
mlong6_eff <- add_criterion(mlong6_eff, "loo")
#plot(mlong6_eff, N=4)
summary(mlong6_eff)$fixed
## Estimate Est.Error l-95% CI
## Intercept -2.04198662 0.9607526 -3.9211299
## Time_sum1 -0.56895570 0.5159499 -1.5944660
## Time_sum2 -0.37584735 0.4960739 -1.4155226
## Prime.Type 0.85514397 0.6426347 -0.3921886
## biasmatchmismatch -1.16440316 0.5874601 -2.3894977
## Time_sum1:Prime.Type -0.17634321 0.7539536 -1.6828224
## Time_sum2:Prime.Type -0.40251753 0.7297878 -1.8989986
## Time_sum1:biasmatchmismatch 0.06573790 0.6919852 -1.3045572
## Time_sum2:biasmatchmismatch -0.19903300 0.7791245 -1.8935938
## Prime.Type:biasmatchmismatch 0.28351026 1.1024708 -1.7758178
## Time_sum1:Prime.Type:biasmatchmismatch 0.09232731 1.1876217 -2.1999529
## Time_sum2:Prime.Type:biasmatchmismatch 1.06777574 1.2045903 -1.2227177
## u-95% CI Rhat Bulk_ESS Tail_ESS
## Intercept -0.08826107 1.0032450 2133.332 3581.345
## Time_sum1 0.44332460 1.0010475 3894.898 4482.010
## Time_sum2 0.56384912 1.0006848 3755.285 4152.618
## Prime.Type 2.13830049 1.0002501 5123.351 5021.889
## biasmatchmismatch -0.05450468 1.0003625 4112.826 5478.828
## Time_sum1:Prime.Type 1.33269773 1.0017485 4781.497 5196.944
## Time_sum2:Prime.Type 0.97987603 1.0002816 5291.555 5632.706
## Time_sum1:biasmatchmismatch 1.40673645 0.9997446 3986.095 4945.762
## Time_sum2:biasmatchmismatch 1.22678031 1.0007165 3514.734 4410.186
## Prime.Type:biasmatchmismatch 2.60669211 1.0008242 5341.215 4987.847
## Time_sum1:Prime.Type:biasmatchmismatch 2.42617355 1.0002407 5154.651 5255.388
## Time_sum2:Prime.Type:biasmatchmismatch 3.43906197 1.0010375 4830.974 5145.186
mlong6_eff.model.check <- createDHARMa(
simulatedResponse = t(posterior_predict(mlong6_eff)),
observedResponse = filter(Combined2, Overlap=="No Overlap")$RESPONSE.CODE,
fittedPredictedResponse = apply(t(posterior_epred(mlong6_eff)), 1, mean),
integerResponse=TRUE
)
plot(mlong6_eff.model.check) # standard plots
testDispersion(mlong6_eff.model.check)
##
## DHARMa nonparametric dispersion test via sd of residuals fitted vs.
## simulated
##
## data: simulationOutput
## dispersion = 0.64014, p-value < 2.2e-16
## alternative hypothesis: two.sided
plotResiduals(mlong6_eff.model.check, form=filter(Combined2, Overlap=="No Overlap")$Time_sum)
Stronger pattern of heteroskedasticity (very large residuals at the high predicted values.)
hypothesis(mlong6_eff, "Prime.Type > 0")
## Hypothesis Tests for class b:
## Hypothesis Estimate Est.Error CI.Lower CI.Upper Evid.Ratio Post.Prob
## 1 (Prime.Type) > 0 0.86 0.64 -0.17 1.87 11.84 0.92
## Star
## 1
## ---
## 'CI': 90%-CI for one-sided and 95%-CI for two-sided hypotheses.
## '*': For one-sided hypotheses, the posterior probability exceeds 95%;
## for two-sided hypotheses, the value tested against lies outside the 95%-CI.
## Posterior probabilities of point hypotheses assume equal prior probabilities.
hypothesis(mlong6_eff, "Prime.Type:biasmatchmismatch > 0")
## Hypothesis Tests for class b:
## Hypothesis Estimate Est.Error CI.Lower CI.Upper Evid.Ratio
## 1 (Prime.Type:biasm... > 0 0.28 1.1 -1.42 2.16 1.48
## Post.Prob Star
## 1 0.6
## ---
## 'CI': 90%-CI for one-sided and 95%-CI for two-sided hypotheses.
## '*': For one-sided hypotheses, the posterior probability exceeds 95%;
## for two-sided hypotheses, the value tested against lies outside the 95%-CI.
## Posterior probabilities of point hypotheses assume equal prior probabilities.
hypothesis(mlong6_eff, "Time_sum1:Prime.Type:biasmatchmismatch < 0")
## Hypothesis Tests for class b:
## Hypothesis Estimate Est.Error CI.Lower CI.Upper Evid.Ratio
## 1 (Time_sum1:Prime.... < 0 0.09 1.19 -1.82 2.03 0.87
## Post.Prob Star
## 1 0.46
## ---
## 'CI': 90%-CI for one-sided and 95%-CI for two-sided hypotheses.
## '*': For one-sided hypotheses, the posterior probability exceeds 95%;
## for two-sided hypotheses, the value tested against lies outside the 95%-CI.
## Posterior probabilities of point hypotheses assume equal prior probabilities.
hypothesis(mlong6_eff, "Time_sum2:Prime.Type:biasmatchmismatch < 0")
## Hypothesis Tests for class b:
## Hypothesis Estimate Est.Error CI.Lower CI.Upper Evid.Ratio
## 1 (Time_sum2:Prime.... < 0 1.07 1.2 -0.84 3.03 0.22
## Post.Prob Star
## 1 0.18
## ---
## 'CI': 90%-CI for one-sided and 95%-CI for two-sided hypotheses.
## '*': For one-sided hypotheses, the posterior probability exceeds 95%;
## for two-sided hypotheses, the value tested against lies outside the 95%-CI.
## Posterior probabilities of point hypotheses assume equal prior probabilities.
tab_model(mlong_eff, mlong2_eff, mlong5_eff, mlong6_eff, show.ci=.90, show.re.var=FALSE, show.r2=FALSE, show.icc=FALSE, transform=NULL, collapse.ci=TRUE, show.p=TRUE, ci.hyphen = " : ", dv.labels=c("Model 1", "Model 2", "Model 3", "Model 4"), string.stat="", file="Table_Long", bpe="mean",
pred.labels=c(
"Intercept",
"54 months",
"48 months",
"Prime (DOD)",
"Overlap",
"54 months * Prime",
"48 months * Prime",
"54 months * Overlap",
"48 months * Overlap",
"Prime * Overlap",
"54 months * Prime * Overlap",
"48 months * Prime * Overlap",
"Bias Mismatch",
"54 months * Bias Mismatch ",
"48 months * Bias Mismatch ",
"Prime * Bias Mismatch",
"54 months* Prime * Bias Mismatch",
"48 months* Prime * Bias Mismatch"
))
| Model 1 | Model 2 | Model 3 | Model 4 | |
|---|---|---|---|---|
| Predictors | Log-Odds | Log-Odds | Log-Odds | Log-Odds |
| Intercept |
-3.03 (-4.37 : -1.56) |
-2.33 (-3.69 : -0.86) |
-2.89 (-4.33 : -1.34) |
-2.04 (-3.59 : -0.45) |
| 54 months |
-0.26 (-0.80 : 0.26) |
-0.55 (-1.21 : 0.09) |
-0.27 (-0.91 : 0.40) |
-0.57 (-1.41 : 0.24) |
| 48 months |
0.22 (-0.26 : 0.70) |
-0.40 (-0.98 : 0.14) |
0.17 (-0.45 : 0.79) |
-0.38 (-1.19 : 0.41) |
| Prime (DOD) |
0.62 (0.12 : 1.15) |
0.60 (-0.05 : 1.29) |
0.76 (-0.18 : 1.72) |
0.86 (-0.17 : 1.87) |
| Overlap |
-1.00 (-1.65 : -0.36) |
-1.15 (-1.97 : -0.43) |
||
| 54 months * Prime |
0.31 (-0.41 : 1.01) |
-0.03 (-0.87 : 0.79) |
0.17 (-0.89 : 1.19) |
-0.18 (-1.43 : 1.04) |
| 48 months * Prime |
-0.11 (-0.72 : 0.50) |
0.14 (-0.64 : 0.93) |
-0.41 (-1.38 : 0.55) |
-0.40 (-1.63 : 0.78) |
| 54 months * Overlap |
0.49 (-0.32 : 1.39) |
0.11 (-0.93 : 1.20) |
||
| 48 months * Overlap |
-0.38 (-0.89 : 0.15) |
-0.14 (-0.94 : 0.64) |
||
| Prime * Overlap |
1.48 (0.51 : 2.56) |
2.08 (0.66 : 3.64) |
||
| 54 months * Prime * Overlap |
-0.74 (-2.01 : 0.51) |
-0.13 (-1.51 : 1.21) |
||
| 48 months * Prime * Overlap |
0.61 (-0.39 : 1.67) |
0.06 (-1.25 : 1.43) |
||
| Bias Mismatch |
-1.13 (-2.04 : -0.33) |
-1.16 (-2.16 : -0.26) |
||
| 54 months * Bias Mismatch |
0.21 (-0.75 : 1.19) |
0.07 (-1.06 : 1.17) |
||
| 48 months * Bias Mismatch |
-0.04 (-1.04 : 0.94) |
-0.20 (-1.52 : 1.01) |
||
| Prime * Bias Mismatch |
0.47 (-0.93 : 2.05) |
0.28 (-1.42 : 2.16) |
||
| 54 months* Prime * Bias Mismatch |
0.31 (-1.38 : 1.96) |
0.09 (-1.82 : 2.03) |
||
| 48 months* Prime * Bias Mismatch |
0.59 (-0.96 : 2.18) |
1.07 (-0.84 : 3.03) |
||
| N | 100 Blind_ID | 46 Blind_ID | 100 Blind_ID | 46 Blind_ID |
| 6 Target.verb | 6 Target.verb | 6 Target.verb | 6 Target.verb | |
| Observations | 2865 | 1479 | 1355 | 696 |
mlong7 <-brm(RESPONSE.CODE ~ 1 + Time_sum +
(1 + Time_sum | Blind_ID) +
(1 +Time_sum | Target.verb),
prior=logit_prior,
iter=4000,
family="bernoulli",
cores=4,
control=list(adapt_delta=.9),
seed=12345,
data=Combined,
file="LONG/output/mlong7.rds")
summary(mlong7)$fixed
## Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS
## Intercept -2.92419075 0.6766662 -4.2067913 -1.5154476 1.000493 2001.908
## Time_sum1 -0.12122086 0.2450626 -0.5971200 0.3617361 1.000645 3955.599
## Time_sum2 0.04747095 0.2394531 -0.4360217 0.5042627 1.000552 3294.096
## Tail_ESS
## Intercept 2765.204
## Time_sum1 4456.720
## Time_sum2 4418.082
Time_c <- c("42","48", "54")
verbs <- c("brought", "gave", "passed", "sent", "showed", "threw")
Predictors <- crossing(verbs, Time_c)
Time_sum <- factor(Time_c, levels=c("54", "48", "42"))
Time_sum1 <- c(-1, 0, 1) # backwards from original models
Time_sum2 <- c(-1, 1, 0) # backwards from original models
contrasts(Time_sum) = contr.sum(3)
#posterior <- posterior_samples(mlong7) %>%
# dplyr::select("b_Intercept", starts_with("r_Target.verb"), starts_with("b_Time_sum")) %>%
# dplyr::rename("b_Time.sum1" = "b_Time_sum1", "b_Time.sum2" = "b_Time_sum2", "brought_Intercept" = "r_Target.verb[brought,Intercept]", "gave_Intercept" = "r_Target.verb[gave,Intercept]", "passed_Intercept" = "r_Target.verb[passed,Intercept]", "sent_Intercept" = "r_Target.verb[sent,Intercept]", "showed_Intercept" = "r_Target.verb[showed,Intercept]", "threw_Intercept" = "r_Target.verb[threw,Intercept]","brought_Time.sum1" = "r_Target.verb[brought,Time_sum1]", "gave_Time.sum1" = "r_Target.verb[gave,Time_sum1]", "passed_Time.sum1" = "r_Target.verb[passed,Time_sum1]", "sent_Time.sum1" = "r_Target.verb[sent,Time_sum1]", "showed_Time.sum1" = "r_Target.verb[showed,Time_sum1]", "threw_Time.sum1" = "r_Target.verb[threw,Time_sum1]" ,"brought_Time.sum2" = "r_Target.verb[brought,Time_sum2]", "gave_Time.sum2" = "r_Target.verb[gave,Time_sum2]", "passed_Time.sum2" = "r_Target.verb[passed,Time_sum2]", "sent_Time.sum2" = "r_Target.verb[sent,Time_sum2]", "showed_Time.sum2" = "r_Target.verb[showed,Time_sum2]", "threw_Time.sum2" = "r_Target.verb[threw,Time_sum2]") %>%
# mutate(
# sample = paste("Samp", row_number(), sep="_")
# ) %>%
# relocate(sample) %>%
# pivot_longer(-"sample", names_to = "condition", values_to="value") %>%
# separate(condition, c("condition", "effect"), "_")
#uni <- unique(posterior$sample)
#newdata <- NULL
#for(i in 1:8000){
# pos <- filter(posterior, sample == uni[i])
# df<- tibble(
# Fixed = filter(pos, condition=="b" & effect=="Intercept")$value +
# filter(pos, condition=="b" & effect=="Time.sum1")$value * Time_sum1 +
# filter(pos, condition=="b" & effect=="Time.sum2")$value * Time_sum2,
# Brought = Fixed +
# filter(pos, condition=="brought" & effect=="Intercept")$value +
# filter(pos, condition=="brought" & effect=="Time.sum1")$value * Time_sum1 +
# filter(pos, condition=="brought" & effect=="Time.sum2")$value * Time_sum2,
# Gave = Fixed +
# filter(pos, condition=="gave" & effect=="Intercept")$value +
# filter(pos, condition=="gave" & effect=="Time.sum1")$value * Time_sum1 +
# filter(pos, condition=="gave" & effect=="Time.sum2")$value * Time_sum2,
# Passed = Fixed +
# filter(pos, condition=="passed" & effect=="Intercept")$value +
# filter(pos, condition=="passed" & effect=="Time.sum1")$value * Time_sum1 +
# filter(pos, condition=="passed" & effect=="Time.sum2")$value * Time_sum2,
# Sent = Fixed +
# filter(pos, condition=="sent" & effect=="Intercept")$value +
# filter(pos, condition=="sent" & effect=="Time.sum1")$value * Time_sum1 +
# filter(pos, condition=="sent" & effect=="Time.sum2")$value * Time_sum2,
# Showed = Fixed +
# filter(pos, condition=="showed" & effect=="Intercept")$value +
# filter(pos, condition=="showed" & effect=="Time.sum1")$value * Time_sum1 +
# filter(pos, condition=="showed" & effect=="Time.sum2")$value * Time_sum2,
# Threw = Fixed +
# filter(pos, condition=="threw" & effect=="Intercept")$value +
# filter(pos, condition=="threw" & effect=="Time.sum1")$value * Time_sum1 +
# filter(pos, condition=="threw" & effect=="Time.sum2")$value * Time_sum2,
# )
# out <- tibble(X = c(df$Brought, df$Gave, df$Passed, df$Sent, df$Showed, df$Threw))
# names(out)[1] <- pos$sample[1]
# newdata = tibble(newdata, out)
#}
#saveRDS(newdata, "plots/by_word.rds")
newdata <- readRDS("plots/by_word.rds")
data <- bind_cols(Predictors, newdata)
df <- Combined %>%
group_by(Time_c, Blind_ID, Target.verb) %>%
summarise(
prop = mean(RESPONSE.CODE)
) %>%
ungroup()
## `summarise()` has grouped output by 'Time_c', 'Blind_ID'. You can override
## using the `.groups` argument.
df_summed <- df %>%
group_by(Time_c, Target.verb) %>%
summarize(
mean_prop = mean(prop)
) %>%
ungroup()
## `summarise()` has grouped output by 'Time_c'. You can override using the
## `.groups` argument.
data <- data %>%
pivot_longer(-c("Time_c", "verbs"), names_to = "sample", values_to="value") %>%
mutate(
prob = inv_logit_scaled(value)
)
summ <- data %>%
select("verbs", "Time_c", "prob") %>%
group_by(verbs, Time_c) %>%
median_qi(prob, .width=.90)
library(ggpirate)
ggplot() +
geom_point(data=summ, aes(x=verbs, y=prob, color=verbs, fill=verbs), size=4) +
geom_errorbar(data=summ, aes(x=verbs, ymin=.lower, ymax=.upper, color=verbs, fill=verbs), size=2) +
facet_wrap(~Time_c) +
geom_hline(yintercept=.5, linetype="dotted") +
theme_minimal() +
theme(axis.text.x = element_blank())+
theme(legend.title=element_blank()) +
scale_fill_colorblind() +
scale_colour_colorblind() +
ylab("Proportion DODs Produced") +
xlab("Target Verb")
## Warning: Using `size` aesthetic for lines was deprecated in ggplot2 3.4.0.
## ℹ Please use `linewidth` instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
## Warning in geom_errorbar(data = summ, aes(x = verbs, ymin = .lower, ymax =
## .upper, : Ignoring unknown aesthetics: fill