library(brms)
library(DHARMa)
library(tidyverse)
library(readxl)
library(rmarkdown)
library(tidybayes)
library(GGally)
library(plotrix)
library(ggpubr)
library(ggthemes)
library(sjPlot)
library(tidybayes)
library(patchwork)
library(readODS)

1 Data Preparation

Open prepped files – these come DOD_42mo.RMD, DOD_48mo.RMD and DOD_54mo.RMD. In those files, I removed missing data and created a variable to indicate whether participants produced at least 1 DOD, and also calculated the number of missing trials per participant. I then saved those datasets so I don’t have to re-run those processing steps.

DOD_42mo_prepped <- read_ods("DOD_42mo_prepped.ods") %>%
  mutate(COVID.Zoom.session = NA) 
DOD_48mo_prepped <- read_ods("DOD_48mo_prepped.ods") %>%
  mutate(COVID.Zoom.session = NA) 
DOD_54mo_prepped <- read_ods("DOD_54mo_prepped.ods")

Add indicator variable for time, select appropriate variables.

DOD_42mo<- DOD_42mo_prepped %>%
  mutate(
    Time = -1,
    Time_c = "42",
    PartSession = paste(Blind_ID, Time, sep="")
  ) %>%
  dplyr::select("Blind_ID", "Time", "Time_c", "PartSession", "List", "Item", "Prime.verb", "Target.verb", "Prime.Type", "Verb.match", "RESPONSE.CODE", "Prime", "Overlap", "bias", "biasmatch", "biasmismatchc", "Trial_Problem", "COVID.Zoom.session")

DOD_48mo<- DOD_48mo_prepped %>%
  mutate(
    Time = 0,
    Time_c = "48",
    PartSession = paste(Blind_ID, Time, sep="")
  ) %>%
  dplyr::select("Blind_ID", "Time", "Time_c", "PartSession",  "List", "Item", "Prime.verb", "Target.verb", "Prime.Type", "Verb.match", "RESPONSE.CODE", "Prime", "Overlap", "bias", "biasmatch", "biasmismatchc", "Trial_Problem", "COVID.Zoom.session")

DOD_54mo<- DOD_54mo_prepped %>%
  mutate(
    Time = 1,
    Time_c = "54",
    PartSession = paste(Blind_ID, Time_c, sep="")
  ) %>%
  dplyr::select("Blind_ID", "Time", "Time_c", "PartSession", "List", "Item", "Prime.verb", "Target.verb", "Prime.Type", "Verb.match", "RESPONSE.CODE", "Prime", "Overlap", "bias", "biasmatch", "biasmismatchc", "Trial_Problem", "COVID.Zoom.session")

Attach DOD Prod variable from first session – to model exclusively trajectory of participants who produced DODs during the first session.

Combined = bind_rows(DOD_42mo, DOD_48mo, DOD_54mo)

Combined <- DOD_42mo_prepped %>%
  group_by(Blind_ID) %>%
  dplyr::select("Blind_ID", "Prod_DOD") %>%
  slice(1) %>%
  full_join(Combined, ., by = "Blind_ID")

Combined$Time_sum <- factor(Combined$Time_c, levels=c("54", "48", "42"))
contrasts(Combined$Time_sum) = contr.sum(3) 

contrasts(Combined$Time_sum)

##    [,1] [,2]
## 54    1    0
## 48    0    1
## 42   -1   -1

Plot effects by time point–to see if priming effect seems to decrease and lexical boost seems to increase by age.

cbPalette <- c("#000000",  "#56B4E9", "#E69F00","#009E73", "#F0E442", "#0072B2", "#D55E00", "#CC79A7")

(p0 <- Combined %>% 
  group_by(Prime, Overlap, Time_c) %>%
  summarise(mean = mean(RESPONSE.CODE, na.rm=TRUE), 
            se = std.error(RESPONSE.CODE, na.rm=TRUE)) %>%
  ggplot(aes(y=mean, x=Overlap, fill=Prime, ymin=mean, ymax=mean+se)) +
  geom_bar(stat="identity",position="dodge") +
 geom_errorbar(width=.4,
                 position=position_dodge(.9)) +
  facet_wrap(~Time_c) +
  ylim(0, .4) + 
  theme_minimal() + 
  labs(y="Raw Mean") +
  scale_fill_manual(values=cbPalette)
)

## `summarise()` has grouped output by 'Prime', 'Overlap'. You can override using
## the `.groups` argument.

2 Abstract Priming/Lexical Boost

2.1 Compare two random effects structures

Truncated gaussian with SD of 2 on random effect SDs. Keeps 95% of probability < 4 (roughly). A random effect SD greater than that is a bit wild on the logit scale.

logit_prior = prior(normal(0,2), class=sd)

Model with nested random effects: ID by Time.

mlong0 <- brm(RESPONSE.CODE ~ 1 + Time_sum +  (1 | Blind_ID:Time_c) + (1 + Time_sum | Blind_ID) + (1 | Target.verb), control=list(adapt_delta = .90),  family="bernoulli", prior=logit_prior, iter=4000, cores=4, data=Combined,seed=12345, file="LONG/output/mlong0.rds")
mlong0 <- add_criterion(mlong0, "loo")

summary(mlong0, prob=.9)

##  Family: bernoulli 
##   Links: mu = logit 
## Formula: RESPONSE.CODE ~ 1 + Time_sum + (1 | Blind_ID:Time_c) + (1 + Time_sum | Blind_ID) + (1 | Target.verb) 
##    Data: Combined (Number of observations: 2865) 
##   Draws: 4 chains, each with iter = 4000; warmup = 2000; thin = 1;
##          total post-warmup draws = 8000
## 
## Group-Level Effects: 
## ~Blind_ID (Number of levels: 100) 
##                          Estimate Est.Error l-90% CI u-90% CI Rhat Bulk_ESS
## sd(Intercept)                0.92      0.25     0.50     1.30 1.01      401
## sd(Time_sum1)                0.36      0.27     0.03     0.90 1.01      451
## sd(Time_sum2)                0.68      0.37     0.09     1.30 1.01      337
## cor(Intercept,Time_sum1)     0.06      0.46    -0.73     0.80 1.00     3021
## cor(Intercept,Time_sum2)     0.14      0.38    -0.52     0.75 1.00     1761
## cor(Time_sum1,Time_sum2)    -0.02      0.47    -0.77     0.78 1.00      770
##                          Tail_ESS
## sd(Intercept)                 413
## sd(Time_sum1)                 285
## sd(Time_sum2)                 405
## cor(Intercept,Time_sum1)     4174
## cor(Intercept,Time_sum2)     2475
## cor(Time_sum1,Time_sum2)     2032
## 
## ~Blind_ID:Time_c (Number of levels: 261) 
##               Estimate Est.Error l-90% CI u-90% CI Rhat Bulk_ESS Tail_ESS
## sd(Intercept)     1.30      0.29     0.83     1.69 1.01      290      179
## 
## ~Target.verb (Number of levels: 6) 
##               Estimate Est.Error l-90% CI u-90% CI Rhat Bulk_ESS Tail_ESS
## sd(Intercept)     1.45      0.54     0.82     2.49 1.00     2606     4353
## 
## Population-Level Effects: 
##           Estimate Est.Error l-90% CI u-90% CI Rhat Bulk_ESS Tail_ESS
## Intercept    -2.90      0.64    -3.88    -1.81 1.00     1898     2848
## Time_sum1    -0.19      0.18    -0.49     0.10 1.00     3788     5792
## Time_sum2     0.03      0.20    -0.29     0.35 1.00     3187     5101
## 
## Draws were sampled using sampling(NUTS). For each parameter, Bulk_ESS
## and Tail_ESS are effective sample size measures, and Rhat is the potential
## scale reduction factor on split chains (at convergence, Rhat = 1).

Random effects by participant, with no additional dependence between trials in a given session.

mlong0b <- brm(RESPONSE.CODE ~ 1 +  Time_sum + (1  +  Time_sum  | Blind_ID) + (1 | Target.verb), control=list(adapt_delta=.9), family="bernoulli", prior=logit_prior, iter=4000, cores=4, data=Combined,seed=12345, file="LONG/output/mlong0b.rds")
mlong0b <- add_criterion(mlong0b, "loo")

summary(mlong0b)

##  Family: bernoulli 
##   Links: mu = logit 
## Formula: RESPONSE.CODE ~ 1 + Time_sum + (1 + Time_sum | Blind_ID) + (1 | Target.verb) 
##    Data: Combined (Number of observations: 2865) 
##   Draws: 4 chains, each with iter = 4000; warmup = 2000; thin = 1;
##          total post-warmup draws = 8000
## 
## Group-Level Effects: 
## ~Blind_ID (Number of levels: 100) 
##                          Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS
## sd(Intercept)                1.25      0.15     0.98     1.57 1.00     3114
## sd(Time_sum1)                1.05      0.18     0.74     1.42 1.00     3386
## sd(Time_sum2)                1.35      0.20     1.00     1.77 1.00     2634
## cor(Intercept,Time_sum1)     0.05      0.20    -0.33     0.43 1.00     3566
## cor(Intercept,Time_sum2)     0.07      0.18    -0.29     0.41 1.00     2185
## cor(Time_sum1,Time_sum2)    -0.33      0.17    -0.62     0.04 1.00     1710
##                          Tail_ESS
## sd(Intercept)                4964
## sd(Time_sum1)                5557
## sd(Time_sum2)                4966
## cor(Intercept,Time_sum1)     5288
## cor(Intercept,Time_sum2)     3792
## cor(Time_sum1,Time_sum2)     3028
## 
## ~Target.verb (Number of levels: 6) 
##               Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## sd(Intercept)     1.45      0.53     0.75     2.81 1.00     3771     4924
## 
## Population-Level Effects: 
##           Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## Intercept    -2.92      0.64    -4.13    -1.54 1.00     2804     4004
## Time_sum1    -0.17      0.19    -0.55     0.19 1.00     5688     5704
## Time_sum2     0.01      0.21    -0.42     0.42 1.00     4635     5225
## 
## Draws were sampled using sampling(NUTS). For each parameter, Bulk_ESS
## and Tail_ESS are effective sample size measures, and Rhat is the potential
## scale reduction factor on split chains (at convergence, Rhat = 1).

#loo1 <- loo(mlong0)
#loo2 <- loo(mlong0b)
#loo3 <- reloo(mlong0b, loo2)
#model_weights(mlong0, mlong0b)

#loo_compare(loo3, loo1)

Model with within session random effects fits numerically better (though less than 1 SE elpd diff = -.4, SE = .9), but given the extra complexity, and the redundancy (slopes for time point are indistinguishable from time-point specific intercepts), I think it’s best to keep the simpler model. Additionally, I’ve run into several convergence issues with the full model.

2.2 Full data

2.2.1 Sum coded

Just to remember how everything is coded:

contrasts(Combined$Time_sum)

##    [,1] [,2]
## 54    1    0
## 48    0    1
## 42   -1   -1

table(Combined$Prime.Type, Combined$Prime)

##       
##         DOD  POD
##   -0.5    0 1446
##   0.5  1419    0

table(Combined$Overlap)

## 
## No Overlap    Overlap 
##       1355       1510

A problem with the linear model is that it makes a pretty strict assumption about the trajectory (that it’s equal between 54 and 48 and 48 and 42 months). Here I treat time categorically, using effects coding (-1, 1).

mlong_eff <- brm(RESPONSE.CODE ~ 1 + Time_sum*Prime.Type*Overlap + 
            (1 + Time_sum*Prime.Type*Overlap | Blind_ID ) +
            (1 + Time_sum*Prime.Type*Overlap   | Target.verb), 
          family="bernoulli", 
          prior=logit_prior, 
          iter=4000, 
          cores=4,
          seed=12345,
          data=Combined, 
          file="LONG/output/mlong_eff.rds")

mlong_eff <-  add_criterion(mlong_eff, "loo")

Trace plots (commented for space)

#plot(mlong_eff, N = 4)

Look at residuals

mlong_eff.model.check <- createDHARMa(
  simulatedResponse = t(posterior_predict(mlong_eff)), 
  observedResponse = Combined$RESPONSE.CODE, 
  fittedPredictedResponse = apply(t(posterior_epred(mlong_eff)), 1, mean), 
  integerResponse=TRUE
)

plot(mlong_eff.model.check) # standard plots

testDispersion(mlong_eff.model.check)

## 
##  DHARMa nonparametric dispersion test via sd of residuals fitted vs.
##  simulated
## 
## data:  simulationOutput
## dispersion = 0.72622, p-value < 2.2e-16
## alternative hypothesis: two.sided

plotResiduals(mlong_eff.model.check, form=Combined$Time_sum)

Again, we have underdispersion and a very weak trend in the residuals vs fitted values.

Get coefficients (only printed fixed effects here for space, see Markdown files for random effects)

summary(mlong_eff, prob=.9)$fixed

##                                       Estimate Est.Error   l-90% CI   u-90% CI
## Intercept                           -3.0312660 0.8549478 -4.3746539 -1.5607215
## Time_sum1                           -0.2588566 0.3250338 -0.7983414  0.2638931
## Time_sum2                            0.2205978 0.2939442 -0.2567553  0.7013722
## Prime.Type                           0.6169287 0.3222499  0.1184692  1.1507760
## OverlapOverlap                      -1.0010990 0.4065189 -1.6457270 -0.3560187
## Time_sum1:Prime.Type                 0.3110120 0.4416843 -0.4129051  1.0125459
## Time_sum2:Prime.Type                -0.1122013 0.3734383 -0.7151050  0.4973341
## Time_sum1:OverlapOverlap             0.4907467 0.5380950 -0.3187667  1.3899418
## Time_sum2:OverlapOverlap            -0.3767525 0.3283955 -0.8896421  0.1531646
## Prime.Type:OverlapOverlap            1.4776504 0.6357077  0.5139654  2.5608167
## Time_sum1:Prime.Type:OverlapOverlap -0.7430864 0.7786027 -2.0139687  0.5137010
## Time_sum2:Prime.Type:OverlapOverlap  0.6076502 0.6294585 -0.3864828  1.6744396
##                                         Rhat Bulk_ESS Tail_ESS
## Intercept                           1.002178 1565.885 2575.088
## Time_sum1                           1.001019 3584.575 4365.895
## Time_sum2                           1.000316 3110.602 4430.052
## Prime.Type                          1.001338 4001.858 4124.043
## OverlapOverlap                      1.001268 2930.794 3730.949
## Time_sum1:Prime.Type                1.000433 4192.804 4172.082
## Time_sum2:Prime.Type                1.000117 5369.447 5099.234
## Time_sum1:OverlapOverlap            1.002213 3673.954 4272.106
## Time_sum2:OverlapOverlap            1.000692 4590.521 4671.631
## Prime.Type:OverlapOverlap           1.000313 4235.138 4170.005
## Time_sum1:Prime.Type:OverlapOverlap 1.000526 3890.403 4308.942
## Time_sum2:Prime.Type:OverlapOverlap 1.001048 4471.004 4526.723

hypothesis(mlong_eff, "Prime.Type > 0")

## Hypothesis Tests for class b:
##         Hypothesis Estimate Est.Error CI.Lower CI.Upper Evid.Ratio Post.Prob
## 1 (Prime.Type) > 0     0.62      0.32     0.12     1.15      44.98      0.98
##   Star
## 1    *
## ---
## 'CI': 90%-CI for one-sided and 95%-CI for two-sided hypotheses.
## '*': For one-sided hypotheses, the posterior probability exceeds 95%;
## for two-sided hypotheses, the value tested against lies outside the 95%-CI.
## Posterior probabilities of point hypotheses assume equal prior probabilities.

hypothesis(mlong_eff, "Time_sum1:Prime.Type < 0")

## Hypothesis Tests for class b:
##                 Hypothesis Estimate Est.Error CI.Lower CI.Upper Evid.Ratio
## 1 (Time_sum1:Prime.... < 0     0.31      0.44    -0.41     1.01       0.29
##   Post.Prob Star
## 1      0.22     
## ---
## 'CI': 90%-CI for one-sided and 95%-CI for two-sided hypotheses.
## '*': For one-sided hypotheses, the posterior probability exceeds 95%;
## for two-sided hypotheses, the value tested against lies outside the 95%-CI.
## Posterior probabilities of point hypotheses assume equal prior probabilities.

hypothesis(mlong_eff, "Time_sum2:Prime.Type < 0")

## Hypothesis Tests for class b:
##                 Hypothesis Estimate Est.Error CI.Lower CI.Upper Evid.Ratio
## 1 (Time_sum2:Prime.... < 0    -0.11      0.37    -0.72      0.5       1.66
##   Post.Prob Star
## 1      0.62     
## ---
## 'CI': 90%-CI for one-sided and 95%-CI for two-sided hypotheses.
## '*': For one-sided hypotheses, the posterior probability exceeds 95%;
## for two-sided hypotheses, the value tested against lies outside the 95%-CI.
## Posterior probabilities of point hypotheses assume equal prior probabilities.

hypothesis(mlong_eff, "Prime.Type:OverlapOverlap > 0")

## Hypothesis Tests for class b:
##                 Hypothesis Estimate Est.Error CI.Lower CI.Upper Evid.Ratio
## 1 (Prime.Type:Overl... > 0     1.48      0.64     0.51     2.56      102.9
##   Post.Prob Star
## 1      0.99    *
## ---
## 'CI': 90%-CI for one-sided and 95%-CI for two-sided hypotheses.
## '*': For one-sided hypotheses, the posterior probability exceeds 95%;
## for two-sided hypotheses, the value tested against lies outside the 95%-CI.
## Posterior probabilities of point hypotheses assume equal prior probabilities.

hypothesis(mlong_eff, "Time_sum1:Prime.Type:OverlapOverlap > 0")

## Hypothesis Tests for class b:
##                 Hypothesis Estimate Est.Error CI.Lower CI.Upper Evid.Ratio
## 1 (Time_sum1:Prime.... > 0    -0.74      0.78    -2.01     0.51       0.18
##   Post.Prob Star
## 1      0.15     
## ---
## 'CI': 90%-CI for one-sided and 95%-CI for two-sided hypotheses.
## '*': For one-sided hypotheses, the posterior probability exceeds 95%;
## for two-sided hypotheses, the value tested against lies outside the 95%-CI.
## Posterior probabilities of point hypotheses assume equal prior probabilities.

hypothesis(mlong_eff, "Time_sum2:Prime.Type:OverlapOverlap > 0")

## Hypothesis Tests for class b:
##                 Hypothesis Estimate Est.Error CI.Lower CI.Upper Evid.Ratio
## 1 (Time_sum2:Prime.... > 0     0.61      0.63    -0.39     1.67       5.45
##   Post.Prob Star
## 1      0.84     
## ---
## 'CI': 90%-CI for one-sided and 95%-CI for two-sided hypotheses.
## '*': For one-sided hypotheses, the posterior probability exceeds 95%;
## for two-sided hypotheses, the value tested against lies outside the 95%-CI.
## Posterior probabilities of point hypotheses assume equal prior probabilities.

Plot, predicted values vs data

p0<- p0 + theme(axis.title.x = element_blank()) 
p2 <- fitted(mlong_eff, re_formula=NA, scale="response") %>%
  cbind(Combined) %>%
  dplyr::select(Prime, Overlap, Time_c, Estimate, Est.Error) %>%
  ggplot(aes(y=Estimate, x=Overlap, fill=Prime )) +
  geom_bar(stat="identity",position="dodge") + 
  geom_errorbar(aes(ymin=Estimate, ymax=Estimate+Est.Error), width=.2,
                 position=position_dodge(.9)) + 
  facet_wrap(~Time_c) +
  theme_minimal() + 
  ylim(0, .4) + 
  labs(y="Predicted Probability") + 
  scale_fill_manual(values=cbPalette)


p0/p2 + plot_layout(guides = "collect")

ggsave("plots/Fig5.png", last_plot(), dpi=500)

## Saving 7 x 5 in image

Get the time point specific effects (abstract priming effect, lexical boost and priming effect with lexical overlap)

# First make dummy data set with fixed effects only, to get posterior samples for each crossing of Time x Prime Structure x Overlap
df <- tibble( # Design matrix (minus intercept which is automatic)
  Overlap = factor(rep(c(0, 0, 1, 1), 3), labels = c("No Overlap", "Overlap")), 
  Prime.Type = rep(c(-.5, .5, -.5, .5), 3), 
  Time_sum = c(rep(54, 4), rep(48, 4), rep(42, 4))
)

# Getting samples on logit scale
ps_means <- posterior_linpred(mlong_eff, newdata=df, re_formula=NA) %>%
  t() %>% 
  as_tibble() %>%
  bind_cols(df, .) %>% 
  pivot_longer(starts_with("V"), names_to="sample", values_to="linpred") %>%
  mutate( # some re-coding to make things more readable later
    Time_c = ifelse(Time_sum==54, yes="54", no = 
                      ifelse(Time_sum==48, yes="48", no = "42")),
    Prime.Type = ifelse(Prime.Type == .5, yes="DOD", no = "POD"), 
    Overlap = ifelse(Overlap=="Overlap", yes="Ov", no = "No")
  ) %>%
  dplyr::select(-"Time_sum") %>%
  pivot_wider(names_from=c("Overlap", "Prime.Type", "Time_c"), values_from="linpred") %>%
  mutate( # calculate time-point specific effects
    Abstract_42 = No_DOD_42 - No_POD_42,
    LexOver_42 = Ov_DOD_42 - Ov_POD_42, 
    Abstract_48 = No_DOD_48 - No_POD_48,
    LexOver_48 = Ov_DOD_48 - Ov_POD_48, 
    Abstract_54 = No_DOD_54 - No_POD_54,
    LexOver_54= Ov_DOD_54 - Ov_POD_54, 
    LB_42 = LexOver_42 - Abstract_42, 
    LB_48 = LexOver_48 - Abstract_48, 
    LB_54 = LexOver_54 - Abstract_54 
  ) %>%
  dplyr::select("sample", starts_with(c("Abs", "Lex", "LB"))) %>%
  pivot_longer(
    starts_with(c("Abs", "Lex", "LB")), 
    names_to = c("Effect", "Time"), 
    names_sep = "_", 
    values_to = "Logits"
  ) %>%
  mutate(
    Effect = ifelse(Effect=="Abstract", yes = Effect, no = ifelse(
                      Effect=="LexOver", yes = "Overlap", no = "Lexical Boost")), 
    Time = paste(Time, "months"), 
    Condition = paste(Time, Effect)
  )

## Warning: The `x` argument of `as_tibble.matrix()` must have unique column names if
## `.name_repair` is omitted as of tibble 2.0.0.
## ℹ Using compatibility `.name_repair`.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.

ps_means_p <- ps_means %>%
  group_by(Condition) %>%
  summarise(
    P = sum(Logits > 0)/8000 # calculate proportion of samples above 0 for each effect
  )


p1 <- ps_means %>% # plot priming effects with and without overlap
   mutate( # re-level conditions so that 42 months is at the top of the figure. 
    Condition = factor(Condition, levels = c("54 months Lexical Boost",  "54 months Overlap",  "54 months Abstract", 
                                             "48 months Lexical Boost", "48 months Overlap", "48 months Abstract",
                                              "42 months Lexical Boost", "42 months Overlap", "42 months Abstract")
  )) %>%
  filter(Effect != "Lexical Boost") %>%
ggplot(aes(y=Condition, x=Logits)) + stat_halfeye(.width=c(.66, .90)) + geom_vline(xintercept = 0) + theme_minimal() + 
  annotate("text", x=7, y=6.15, label= paste("p =", round(ps_means_p[1, 2], 2))) + 
   annotate("text", x=7, y=5.15, label= paste("p =", round(ps_means_p[3, 2], 2))) + 
    annotate("text", x=7, y=4.15, label= paste("p =", round(ps_means_p[4, 2], 2))) + 
   annotate("text", x=7, y=3.15, label= paste("p =", round(ps_means_p[6, 2], 2))) + 
    annotate("text", x=7, y=2.15, label= paste("p =", round(ps_means_p[7, 2], 2))) + 
   annotate("text", x=7, y=1.15, label= paste("p =", round(ps_means_p[9, 2], 2)))  +
    theme(axis.title.x=element_blank(), axis.title.y=element_blank(), axis.text.x=element_blank()) 
 

p2 <- ps_means %>% # plot lexical boosts
  mutate( # re-level conditions so that 42 months is at the top of the figure. 
    Condition = factor(Condition, levels = c("54 months Lexical Boost",  "54 months Overlap",  "54 months Abstract", 
                                             "48 months Lexical Boost", "48 months Overlap", "48 months Abstract",
                                              "42 months Lexical Boost", "42 months Overlap", "42 months Abstract")
  )) %>%
  filter(Effect == "Lexical Boost") %>%
  ggplot(aes(y=Condition,  x=Logits)) + stat_halfeyeh(.width=c(.66, .90)) + geom_vline(xintercept = 0) + theme_minimal() +
  annotate("text", x=7, y=3.15, label= paste("p =", round(ps_means_p[2, 2], 2))) + 
  annotate("text", x=7, y=2.15, label= paste("p =", round(ps_means_p[5, 2], 2))) + 
  annotate("text", x=7, y=1.15, label= paste("p =", round(ps_means_p[8, 2], 2))) + 
  labs(y="Effect") +
  theme(axis.title.y=element_blank())

## Warning: 'stat_halfeyeh' is deprecated.
## Use 'stat_halfeye' instead.
## See help("Deprecated") and help("tidybayes-deprecated").

(p1 / p2) + labs(y = "Effect")

ggsave("plots/Fig6.png", last_plot(), dpi=500)

## Saving 7 x 5 in image

I have also run some sensitivity analyses, which were suppressed from the R Markdown file. These tested whether (a) dropping participants who completed the 54 month sesison on Zoom, (b) dropping the mis-administered trial, (c) forcing uncorrelated random effects or (d) dropping trials with high pareto’s K changed the results. Results were qualitatively similar to those above. For space reasons I have suppressed these from the html file, but see R markdown file associated with this project for more information.

2.3 Only kids who produced a DOD at the first time point

Include only participants who produced a DOD at first time point.

This model gave some divergent transitions and low effective sample sizes, so I up-ed the adapt delta and added some more iterations.

Combined2 <- filter(Combined, Prod_DOD > 0)
mlong2_eff <- brm(RESPONSE.CODE ~ 1 + Time_sum*Prime.Type*Overlap + 
            (1 + Time_sum*Prime.Type*Overlap | Blind_ID) +
            (1 + Time_sum*Prime.Type*Overlap   | Target.verb), 
          family="bernoulli", 
          prior=logit_prior, 
          iter=5000, 
          cores=4,
          seed=12345,
          control=list(adapt_delta=.95),
          data=Combined2, 
          file="LONG/output/mlong2_eff.rds")

mlong2_eff <- add_criterion(mlong2_eff, "loo")

#plot(mlong2_eff, N= 4)

mlong2_eff.model.check <- createDHARMa(
  simulatedResponse = t(posterior_predict(mlong2_eff)), 
  observedResponse = Combined2$RESPONSE.CODE, 
  fittedPredictedResponse = apply(t(posterior_epred(mlong2_eff)), 1, mean), 
  integerResponse=TRUE
)

plot(mlong2_eff.model.check) # standard plots

## DHARMa:testOutliers with type = binomial may have inflated Type I error rates for integer-valued distributions. To get a more exact result, it is recommended to re-run testOutliers with type = 'bootstrap'. See ?testOutliers for details

testDispersion(mlong2_eff.model.check)

## 
##  DHARMa nonparametric dispersion test via sd of residuals fitted vs.
##  simulated
## 
## data:  simulationOutput
## dispersion = 0.69737, p-value < 2.2e-16
## alternative hypothesis: two.sided

plotResiduals(mlong2_eff.model.check, form=Combined2$Time_sum)

Significant difference in residual variances across time (in particular, more undersdispersion at last time point)

summary(mlong2_eff)$fixed

##                                        Estimate Est.Error   l-95% CI   u-95% CI
## Intercept                           -2.32645220 0.8865980 -4.0093346 -0.4661804
## Time_sum1                           -0.54971387 0.4097927 -1.3945221  0.2514227
## Time_sum2                           -0.40248303 0.3498083 -1.1153815  0.2636639
## Prime.Type                           0.60058708 0.4235028 -0.1891931  1.4798969
## OverlapOverlap                      -1.14608547 0.4856274 -2.1975009 -0.2684393
## Time_sum1:Prime.Type                -0.02866171 0.5074409 -1.0549500  0.9552859
## Time_sum2:Prime.Type                 0.13921543 0.4862987 -0.7977964  1.0992696
## Time_sum1:OverlapOverlap             0.11120742 0.6589775 -1.1802128  1.4536143
## Time_sum2:OverlapOverlap            -0.13708739 0.4972115 -1.1188479  0.8382416
## Prime.Type:OverlapOverlap            2.07772073 0.9192851  0.3587913  4.0255821
## Time_sum1:Prime.Type:OverlapOverlap -0.13108920 0.8362947 -1.8189339  1.4981539
## Time_sum2:Prime.Type:OverlapOverlap  0.06320939 0.8265207 -1.4994080  1.7488969
##                                          Rhat Bulk_ESS Tail_ESS
## Intercept                           1.0004938 3007.830 5223.710
## Time_sum1                           0.9999707 6487.111 5534.679
## Time_sum2                           1.0005988 6004.676 5817.865
## Prime.Type                          1.0004425 7504.172 7502.811
## OverlapOverlap                      1.0009754 5138.638 6875.435
## Time_sum1:Prime.Type                1.0002606 8222.877 6744.853
## Time_sum2:Prime.Type                1.0004784 8079.467 6860.411
## Time_sum1:OverlapOverlap            1.0003730 5704.203 6016.118
## Time_sum2:OverlapOverlap            1.0001887 7425.760 5664.096
## Prime.Type:OverlapOverlap           1.0001290 6623.562 6533.937
## Time_sum1:Prime.Type:OverlapOverlap 0.9999754 8265.991 7087.933
## Time_sum2:Prime.Type:OverlapOverlap 0.9999875 7074.946 6908.085

hypothesis(mlong2_eff, "Prime.Type > 0")

## Hypothesis Tests for class b:
##         Hypothesis Estimate Est.Error CI.Lower CI.Upper Evid.Ratio Post.Prob
## 1 (Prime.Type) > 0      0.6      0.42    -0.05     1.29      14.65      0.94
##   Star
## 1     
## ---
## 'CI': 90%-CI for one-sided and 95%-CI for two-sided hypotheses.
## '*': For one-sided hypotheses, the posterior probability exceeds 95%;
## for two-sided hypotheses, the value tested against lies outside the 95%-CI.
## Posterior probabilities of point hypotheses assume equal prior probabilities.

hypothesis(mlong2_eff, "Time_sum1:Prime.Type > 0")

## Hypothesis Tests for class b:
##                 Hypothesis Estimate Est.Error CI.Lower CI.Upper Evid.Ratio
## 1 (Time_sum1:Prime.... > 0    -0.03      0.51    -0.87     0.79        0.9
##   Post.Prob Star
## 1      0.47     
## ---
## 'CI': 90%-CI for one-sided and 95%-CI for two-sided hypotheses.
## '*': For one-sided hypotheses, the posterior probability exceeds 95%;
## for two-sided hypotheses, the value tested against lies outside the 95%-CI.
## Posterior probabilities of point hypotheses assume equal prior probabilities.

hypothesis(mlong2_eff, "Time_sum2:Prime.Type > 0")

## Hypothesis Tests for class b:
##                 Hypothesis Estimate Est.Error CI.Lower CI.Upper Evid.Ratio
## 1 (Time_sum2:Prime.... > 0     0.14      0.49    -0.64     0.93        1.6
##   Post.Prob Star
## 1      0.62     
## ---
## 'CI': 90%-CI for one-sided and 95%-CI for two-sided hypotheses.
## '*': For one-sided hypotheses, the posterior probability exceeds 95%;
## for two-sided hypotheses, the value tested against lies outside the 95%-CI.
## Posterior probabilities of point hypotheses assume equal prior probabilities.

hypothesis(mlong2_eff, "Prime.Type:OverlapOverlap > 0")

## Hypothesis Tests for class b:
##                 Hypothesis Estimate Est.Error CI.Lower CI.Upper Evid.Ratio
## 1 (Prime.Type:Overl... > 0     2.08      0.92     0.66     3.64      88.29
##   Post.Prob Star
## 1      0.99    *
## ---
## 'CI': 90%-CI for one-sided and 95%-CI for two-sided hypotheses.
## '*': For one-sided hypotheses, the posterior probability exceeds 95%;
## for two-sided hypotheses, the value tested against lies outside the 95%-CI.
## Posterior probabilities of point hypotheses assume equal prior probabilities.

hypothesis(mlong2_eff, "Time_sum1:Prime.Type:OverlapOverlap > 0")

## Hypothesis Tests for class b:
##                 Hypothesis Estimate Est.Error CI.Lower CI.Upper Evid.Ratio
## 1 (Time_sum1:Prime.... > 0    -0.13      0.84    -1.51     1.21       0.77
##   Post.Prob Star
## 1      0.43     
## ---
## 'CI': 90%-CI for one-sided and 95%-CI for two-sided hypotheses.
## '*': For one-sided hypotheses, the posterior probability exceeds 95%;
## for two-sided hypotheses, the value tested against lies outside the 95%-CI.
## Posterior probabilities of point hypotheses assume equal prior probabilities.

hypothesis(mlong2_eff, "Time_sum2:Prime.Type:OverlapOverlap > 0")

## Hypothesis Tests for class b:
##                 Hypothesis Estimate Est.Error CI.Lower CI.Upper Evid.Ratio
## 1 (Time_sum2:Prime.... > 0     0.06      0.83    -1.25     1.43       1.09
##   Post.Prob Star
## 1      0.52     
## ---
## 'CI': 90%-CI for one-sided and 95%-CI for two-sided hypotheses.
## '*': For one-sided hypotheses, the posterior probability exceeds 95%;
## for two-sided hypotheses, the value tested against lies outside the 95%-CI.
## Posterior probabilities of point hypotheses assume equal prior probabilities.

Check if conditional effects differ.

df <- tibble( # Design matrix (minus intercept which is automatic)
  Overlap = factor(rep(c(0, 0, 1, 1), 3), labels = c("No Overlap", "Overlap")), 
  Prime.Type = rep(c(-.5, .5, -.5, .5), 3), 
  Time_sum = c(rep(54, 4), rep(48, 4), rep(42, 4))
)

ps_means <- posterior_linpred(mlong2_eff, newdata=df, re_formula=NA) %>%
  t() %>% # posterior samples of linear predictor.
  as_tibble() %>%
  bind_cols(df, .) %>%
  pivot_longer(starts_with("V"), names_to="sample", values_to="linpred") %>%
  mutate(
    Time_c = ifelse(Time_sum==54, yes="54", no = 
                      ifelse(Time_sum==48, yes="48", no = "42")),
    Prime.Type = ifelse(Prime.Type == .5, yes="DOD", no = "POD"), 
    Overlap = ifelse(Overlap=="Overlap", yes="Ov", no = "No")
  ) %>%
  dplyr::select(-"Time_sum") %>%
  pivot_wider(names_from=c("Overlap", "Prime.Type", "Time_c"), values_from="linpred") %>%
  mutate(
    Abstract_42 = No_DOD_42 - No_POD_42,
    LexOver_42 = Ov_DOD_42 - Ov_POD_42, 
    Abstract_48 = No_DOD_48 - No_POD_48,
    LexOver_48 = Ov_DOD_48 - Ov_POD_48, 
    Abstract_54 = No_DOD_54 - No_POD_54,
    LexOver_54= Ov_DOD_54- Ov_POD_54, 
    LB_42 = LexOver_42 - Abstract_42, 
    LB_48 = LexOver_48 - Abstract_48, 
    LB_54 = LexOver_54 - Abstract_54 
  ) %>%
  dplyr::select("sample", starts_with(c("Abs", "Lex", "LB"))) %>%
  pivot_longer(
    starts_with(c("Abs", "Lex", "LB")), 
    names_to = c("Effect", "Time"), 
    names_sep = "_", 
    values_to = "Logits"
  ) %>%
  mutate(
    Effect = ifelse(Effect=="Abstract", yes = Effect, no = ifelse(
                      Effect=="LexOver", yes = "Overlap", no = "Lexical Boost")), 
    Time = paste(Time, "months"), 
    Condition = paste(Time, Effect)
  ) 


ps_means_p <- ps_means %>%
  group_by(Condition) %>%
  summarise(
    P = sum(Logits > 0)/10000 # increased denominator here because more iterations in intial model. 
  )

p1 <- ps_means %>% # plot priming effects with and without overlap
   mutate( # re-level conditions so that 42 months is at the top of the figure. 
    Condition = factor(Condition, levels = c("54 months Lexical Boost",  "54 months Overlap",  "54 months Abstract", 
                                             "48 months Lexical Boost", "48 months Overlap", "48 months Abstract",
                                              "42 months Lexical Boost", "42 months Overlap", "42 months Abstract")
  )) %>%
  filter(Effect != "Lexical Boost") %>%
ggplot(aes(y=Condition, x=Logits)) + stat_halfeye(.width=c(.66, .90)) + geom_vline(xintercept = 0) + theme_minimal() + 
  annotate("text", x=7, y=6.15, label= paste("p =", round(ps_means_p[1, 2], 2))) + 
   annotate("text", x=7, y=5.15, label= paste("p =", round(ps_means_p[3, 2], 2))) + 
    annotate("text", x=7, y=4.15, label= paste("p =", round(ps_means_p[4, 2], 2))) + 
   annotate("text", x=7, y=3.15, label= paste("p =", round(ps_means_p[6, 2], 2))) + 
    annotate("text", x=7, y=2.15, label= paste("p =", round(ps_means_p[7, 2], 2))) + 
   annotate("text", x=7, y=1.15, label= paste("p =", round(ps_means_p[9, 2], 2)))  +
    theme(axis.title.x=element_blank(), axis.title.y=element_blank(), axis.text.x=element_blank()) 
 

p2 <- ps_means %>% # plot lexical boosts
  mutate( # re-level conditions so that 42 months is at the top of the figure. 
    Condition = factor(Condition, levels = c("54 months Lexical Boost",  "54 months Overlap",  "54 months Abstract", 
                                             "48 months Lexical Boost", "48 months Overlap", "48 months Abstract",
                                              "42 months Lexical Boost", "42 months Overlap", "42 months Abstract")
  )) %>%
  filter(Effect == "Lexical Boost") %>%
  ggplot(aes(y=Condition,  x=Logits)) + stat_halfeyeh(.width=c(.66, .90)) + geom_vline(xintercept = 0) + theme_minimal() +
  annotate("text", x=7, y=3.15, label= paste("p =", round(ps_means_p[2, 2], 2))) + 
  annotate("text", x=7, y=2.15, label= paste("p =", round(ps_means_p[5, 2], 2))) + 
  annotate("text", x=7, y=1.15, label= paste("p =", round(ps_means_p[8, 2], 2))) + 
  labs(y="Effect") +
  theme(axis.title.y=element_blank())

## Warning: 'stat_halfeyeh' is deprecated.
## Use 'stat_halfeye' instead.
## See help("Deprecated") and help("tidybayes-deprecated").

(p1 / p2) + labs(y = "Effect")

ggsave("plots/Fig7.png", last_plot(), dpi=500)

## Saving 7 x 5 in image

I’ve conducted the same sensitivity analyses as before, but excluded them from the html file.

# Drop participants who completed 54 month session on zoom
mlong2_effa <- Combined2 %>%
  filter(is.na(COVID.Zoom.session)) %>%
  brm(RESPONSE.CODE ~ 1 + Time_sum*Prime.Type*Overlap + 
            (1 + Time_sum*Prime.Type*Overlap| Blind_ID ) +
            (1 + Time_sum*Prime.Type*Overlap | Target.verb), 
          family="bernoulli", 
          iter=2000,
          cores=4, 
          prior=logit_prior,
          seed=12345,
          data=., 
          file="LONG/output/mlong2_effa.rds")

hypothesis(mlong2_effa, "Prime.Type > 0")
hypothesis(mlong2_effa, "Time_sum1:Prime.Type < 0")
hypothesis(mlong2_effa, "Time_sum2:Prime.Type < 0")
hypothesis(mlong2_effa, "Prime.Type:OverlapOverlap > 0")
hypothesis(mlong2_effa, "Time_sum1:Prime.Type:OverlapOverlap > 0")
hypothesis(mlong2_effa, "Time_sum2:Prime.Type:OverlapOverlap > 0")

# Drop problematic trial
mlong2_effb <- Combined2 %>%
  filter(Trial_Problem == 0) %>%
 brm(RESPONSE.CODE ~ 1 + Time_sum*Prime.Type*Overlap + 
                (1 + Time_sum*Prime.Type*Overlap| Blind_ID ) +
            (1 + Time_sum*Prime.Type*Overlap | Target.verb), 
          family="bernoulli", 
          iter=2000,
          cores=4, 
          prior=logit_prior,
          seed=12345,
          data=., 
          file="LONG/output/mlong2_effb.rds")

hypothesis(mlong2_effb, "Prime.Type > 0")
hypothesis(mlong2_effb, "Time_sum1:Prime.Type < 0")
hypothesis(mlong2_effb, "Time_sum2:Prime.Type < 0")
hypothesis(mlong2_effb, "Prime.Type:OverlapOverlap > 0")
hypothesis(mlong2_effb, "Time_sum1:Prime.Type:OverlapOverlap > 0")
hypothesis(mlong2_effb, "Time_sum2:Prime.Type:OverlapOverlap > 0")


# uncorrelated random effects
mlong2_effc <-  brm(RESPONSE.CODE ~ 1 + Time_sum*Prime.Type*Overlap + 
                (1 + Time_sum*Prime.Type*Overlap|| Blind_ID ) +
                (1 + Time_sum*Prime.Type*Overlap || Target.verb), 
          family="bernoulli", 
          prior=logit_prior, 
          iter=2000, 
          cores=4, 
          seed=12345,
           data=Combined2, 
          file="LONG/output/mlong2_effc.rds")
hypothesis(mlong2_effc, "Prime.Type > 0")
hypothesis(mlong2_effc, "Time_sum1:Prime.Type < 0")
hypothesis(mlong2_effc, "Time_sum2:Prime.Type < 0")
hypothesis(mlong2_effc, "Prime.Type:OverlapOverlap > 0")
hypothesis(mlong2_effc, "Time_sum1:Prime.Type:OverlapOverlap > 0")
hypothesis(mlong2_effc, "Time_sum2:Prime.Type:OverlapOverlap > 0")


# drop high pareto's k. 
mlong2_effd <-  Combined2 %>%
  mutate(
    k = mlong2_eff$criteria$loo$diagnostics$pareto_k
  ) %>%
  filter(k < .7) %>%
brm(RESPONSE.CODE ~ 1 + Time_sum*Prime.Type*Overlap + 
                (1 + Time_sum*Prime.Type*Overlap| Blind_ID ) +
            (1 + Time_sum*Prime.Type*Overlap | Target.verb), 
          family="bernoulli", 
          prior=logit_prior, 
          iter=2000, 
          cores=4,
          seed=12345,
          data=., 
          file="LONG/output/mlong_effd.rds")
hypothesis(mlong2_effd, "Prime.Type > 0")
hypothesis(mlong2_effd, "Time_sum1:Prime.Type < 0")
hypothesis(mlong2_effd, "Time_sum2:Prime.Type < 0")
hypothesis(mlong2_effd, "Prime.Type:OverlapOverlap > 0")
hypothesis(mlong2_effd, "Time_sum1:Prime.Type:OverlapOverlap > 0")
hypothesis(mlong2_effd, "Time_sum2:Prime.Type:OverlapOverlap > 0")

3 Prime Surprisal Effect

3.1 Full Data

mlong5_eff <- brm(RESPONSE.CODE ~ 1 + Time_sum*+Prime.Type*biasmatch + 
            (1 + Time_sum*Prime.Type*biasmatch | Blind_ID ) +
            (1 + Time_sum*Prime.Type*biasmatch   | Target.verb), 
          family="bernoulli", 
          prior=logit_prior, 
          iter=4000, 
          seed=12345,
          cores=4, 
          data=Combined %>% filter(Overlap=="No Overlap"), 
          file="LONG/output/mlong5_eff.rds")
mlong5_eff <- add_criterion(mlong5_eff, "loo")

Some of the chains on random effects don’t look well mixed (though R Hats are all <= 1.01). I will try simplifying the random effects structure as well.

#plot(mlong5_eff, N=4)

mlong5_eff.model.check <- createDHARMa(
  simulatedResponse = t(posterior_predict(mlong5_eff)), 
  observedResponse = filter(Combined, Overlap=="No Overlap")$RESPONSE.CODE, 
  fittedPredictedResponse = apply(t(posterior_epred(mlong5_eff)), 1, mean), 
  integerResponse=TRUE
)

plot(mlong5_eff.model.check) # standard plots

testDispersion(mlong5_eff.model.check)

## 
##  DHARMa nonparametric dispersion test via sd of residuals fitted vs.
##  simulated
## 
## data:  simulationOutput
## dispersion = 0.59289, p-value < 2.2e-16
## alternative hypothesis: two.sided

plotResiduals(mlong5_eff.model.check, form=filter(Combined, Overlap=="No Overlap")$Time_sum)

Stronger pattern of heteroskedasticity (very large residuals at the high predicted values.)

summary(mlong5_eff)$fixed

##                                           Estimate Est.Error   l-95% CI
## Intercept                              -2.89006787 0.9291285 -4.6978773
## Time_sum1                              -0.27433667 0.4024637 -1.0459088
## Time_sum2                               0.17029651 0.3796431 -0.5669265
## Prime.Type                              0.76053534 0.5863900 -0.4032202
## biasmatchmismatch                      -1.12795385 0.5332996 -2.2571572
## Time_sum1:Prime.Type                    0.16927541 0.6440303 -1.1258569
## Time_sum2:Prime.Type                   -0.40591784 0.6014875 -1.6270601
## Time_sum1:biasmatchmismatch             0.21305375 0.5986301 -0.9640565
## Time_sum2:biasmatchmismatch            -0.04044774 0.6056338 -1.2683294
## Prime.Type:biasmatchmismatch            0.47346771 0.9311701 -1.2731436
## Time_sum1:Prime.Type:biasmatchmismatch  0.31074103 1.0176411 -1.7283992
## Time_sum2:Prime.Type:biasmatchmismatch  0.59458758 0.9586195 -1.3098092
##                                          u-95% CI     Rhat Bulk_ESS Tail_ESS
## Intercept                              -0.9639915 1.000903 2313.193 3835.871
## Time_sum1                               0.5414602 1.000443 4966.715 5024.343
## Time_sum2                               0.9302729 1.000437 4674.957 5094.940
## Prime.Type                              1.9755630 1.000294 4624.824 3804.005
## biasmatchmismatch                      -0.1492602 1.001312 3568.747 4865.744
## Time_sum1:Prime.Type                    1.4299039 1.000202 5365.598 5384.595
## Time_sum2:Prime.Type                    0.7518175 1.000551 5671.926 5318.146
## Time_sum1:biasmatchmismatch             1.4098920 1.000180 4498.873 5032.982
## Time_sum2:biasmatchmismatch             1.1347019 1.000038 4646.067 5880.978
## Prime.Type:biasmatchmismatch            2.4753917 1.000511 4734.724 3754.676
## Time_sum1:Prime.Type:biasmatchmismatch  2.3428495 1.000791 5353.698 5694.695
## Time_sum2:Prime.Type:biasmatchmismatch  2.5108849 1.000553 5215.156 5170.872

hypothesis(mlong5_eff, "Prime.Type > 0")

## Hypothesis Tests for class b:
##         Hypothesis Estimate Est.Error CI.Lower CI.Upper Evid.Ratio Post.Prob
## 1 (Prime.Type) > 0     0.76      0.59    -0.18     1.72      11.44      0.92
##   Star
## 1     
## ---
## 'CI': 90%-CI for one-sided and 95%-CI for two-sided hypotheses.
## '*': For one-sided hypotheses, the posterior probability exceeds 95%;
## for two-sided hypotheses, the value tested against lies outside the 95%-CI.
## Posterior probabilities of point hypotheses assume equal prior probabilities.

hypothesis(mlong5_eff, "Prime.Type:biasmatchmismatch  > 0")

## Hypothesis Tests for class b:
##                 Hypothesis Estimate Est.Error CI.Lower CI.Upper Evid.Ratio
## 1 (Prime.Type:biasm... > 0     0.47      0.93    -0.93     2.05       2.29
##   Post.Prob Star
## 1       0.7     
## ---
## 'CI': 90%-CI for one-sided and 95%-CI for two-sided hypotheses.
## '*': For one-sided hypotheses, the posterior probability exceeds 95%;
## for two-sided hypotheses, the value tested against lies outside the 95%-CI.
## Posterior probabilities of point hypotheses assume equal prior probabilities.

hypothesis(mlong5_eff, "Time_sum1:Prime.Type:biasmatchmismatch < 0")

## Hypothesis Tests for class b:
##                 Hypothesis Estimate Est.Error CI.Lower CI.Upper Evid.Ratio
## 1 (Time_sum1:Prime.... < 0     0.31      1.02    -1.38     1.96       0.59
##   Post.Prob Star
## 1      0.37     
## ---
## 'CI': 90%-CI for one-sided and 95%-CI for two-sided hypotheses.
## '*': For one-sided hypotheses, the posterior probability exceeds 95%;
## for two-sided hypotheses, the value tested against lies outside the 95%-CI.
## Posterior probabilities of point hypotheses assume equal prior probabilities.

hypothesis(mlong5_eff, "Time_sum2:Prime.Type:biasmatchmismatch < 0")

## Hypothesis Tests for class b:
##                 Hypothesis Estimate Est.Error CI.Lower CI.Upper Evid.Ratio
## 1 (Time_sum2:Prime.... < 0     0.59      0.96    -0.96     2.18       0.36
##   Post.Prob Star
## 1      0.26     
## ---
## 'CI': 90%-CI for one-sided and 95%-CI for two-sided hypotheses.
## '*': For one-sided hypotheses, the posterior probability exceeds 95%;
## for two-sided hypotheses, the value tested against lies outside the 95%-CI.
## Posterior probabilities of point hypotheses assume equal prior probabilities.

Get Empirical Means and Predicted Values

(p2 <- Combined %>% 
  filter(Overlap=="No Overlap") %>% 
  group_by(Prime, biasmatch, Time_c) %>%
  summarise(mean = mean(RESPONSE.CODE, na.rm=TRUE), 
             se = std.error(RESPONSE.CODE, na.rm=TRUE)) %>%
  
ggplot(aes(y=mean, x=biasmatch, fill=Prime)) +
  geom_bar(stat="identity",position="dodge") +
 geom_errorbar(aes(ymin=mean, ymax=mean+se), width=.2,
                 position=position_dodge(.9)) +
  facet_wrap(~Time_c) +
  ylim(0, .4) + 
  theme_minimal() + 
  labs(y="Raw Mean") + 
  scale_fill_manual(values=cbPalette) +
  theme(axis.title.x = element_blank())
)

## `summarise()` has grouped output by 'Prime', 'biasmatch'. You can override
## using the `.groups` argument.

p5 <- fitted(mlong5_eff, re_formula=NA, scale="response") %>%
  cbind(Combined %>% filter(Overlap=="No Overlap")) %>%
  dplyr::select(Prime, biasmatch, Time_c, Estimate, Est.Error) %>%
  ggplot(aes(y=Estimate, x=biasmatch, fill=Prime)) +
  geom_bar(stat="identity",position="dodge") + 
  geom_errorbar(aes(ymin=Estimate, ymax=Estimate+Est.Error), width=.2,
                 position=position_dodge(.9)) + 
  facet_wrap(~Time_c) +
  theme_minimal() + 
  ylim(0, .4) + 
  labs(y="Predicted Probability") + 
  scale_fill_manual(values=cbPalette) +
  theme(strip.text = element_blank())

p2/p5 + plot_layout(guides = "collect")

ggsave("plots/Fig8.png", last_plot(), dpi=500)

## Saving 7 x 5 in image

df <- tibble( # Design matrix (minus intercept which is automatic)
  biasmatch = factor(rep(c(0, 0, 1, 1), 3), labels = c("match", "mismatch")), 
  Prime.Type = rep(c(-.5, .5, -.5, .5), 3), 
  Time_sum = c(rep(54, 4), rep(48, 4), rep(42, 4))
)

ps_means <- posterior_linpred(mlong5_eff, newdata=df, re_formula=NA) %>%
  t() %>% # posterior samples of linear predictor.
  as_tibble() %>%
  bind_cols(df, .) %>%
  pivot_longer(starts_with("V"), names_to="sample", values_to="linpred") %>%
  mutate(
    Time_c = ifelse(Time_sum==54, yes="54", no = 
                      ifelse(Time_sum==48, yes="48", no = "42")),
    Prime.Type = ifelse(Prime.Type == .5, yes="DOD", no = "POD"), 
    biasmatch = ifelse(biasmatch=="mismatch", yes="mm", no = "m")
  ) %>%
  dplyr::select(-"Time_sum") %>%
  pivot_wider(names_from=c("biasmatch", "Prime.Type", "Time_c"), values_from="linpred") %>%
  mutate(
    Match_42 = m_DOD_42 - m_POD_42,
    Mismatch_42 = mm_DOD_42 - mm_POD_42, 
    Match_48 = m_DOD_48 - m_POD_48,
    Mismatch_48 = mm_DOD_48 - mm_POD_48, 
    Match_54 = m_DOD_54 - m_POD_54,
    Mismatch_54= mm_DOD_54- mm_POD_54, 
    Surprisal_42 = Mismatch_42 - Match_42, 
    Surprisal_48 = Mismatch_48 - Match_48, 
    Surprisal_54 = Mismatch_54 - Match_54 
  ) %>%
  dplyr::select("sample", starts_with(c("Match", "Mismatch", "Surprisal"))) %>%
  pivot_longer(
    starts_with(c("Match", "Mismatch", "Surprisal")), 
    names_to = c("Effect", "Time"), 
    names_sep = "_", 
    values_to = "Logits"
  ) %>%
  mutate(
    Time = paste(Time, "months"), 
    Condition = paste(Time, Effect)
  ) 


ps_means_p <- ps_means %>%
  group_by(Condition) %>%
  summarise(
    P = sum(Logits > 0)/8000
  )

p1 <- ps_means %>%
  mutate( # re-level conditions so that 42 months is at the top of the figure. 
    Condition = factor(Condition, levels = c("54 months Surprisal",  "54 months Mismatch",  "54 months Match", 
                                             "48 months Surprisal", "48 months Mismatch", "48 months Match",
                                              "42 months Surprisal", "42 months Mismatch", "42 months Match")
  )) %>%
  filter(Effect != "Surprisal") %>%
ggplot(aes(y=Condition, x=Logits)) + stat_halfeye(.width=c(.66, .90)) + geom_vline(xintercept = 0) + theme_minimal() + 
  annotate("text", x=7, y=6.15, label= paste("p =", round(ps_means_p[1, 2], 2))) + 
   annotate("text", x=7, y=5.15, label= paste("p =", round(ps_means_p[2, 2], 2))) + 
    annotate("text", x=7, y=4.15, label= paste("p =", round(ps_means_p[4, 2], 2))) + 
   annotate("text", x=7, y=3.15, label= paste("p =", round(ps_means_p[5, 2], 2))) + 
    annotate("text", x=7, y=2.15, label= paste("p =", round(ps_means_p[7, 2], 2))) + 
   annotate("text", x=7, y=1.15, label= paste("p =", round(ps_means_p[8, 2], 2)))  +
    theme(axis.title.x=element_blank(), axis.title.y=element_blank(), axis.text.x=element_blank()) 
 

p2 <- ps_means %>%
   mutate( # re-level conditions so that 42 months is at the top of the figure. 
    Condition = factor(Condition, levels = c("54 months Surprisal",  "54 months Mismatch",  "54 months Match", 
                                             "48 months Surprisal", "48 months Mismatch", "48 months Match",
                                              "42 months Surprisal", "42 months Mismatch", "42 months Match")
  )) %>%
  filter(Effect == "Surprisal") %>%
  ggplot(aes(y=Condition,  x=Logits)) + stat_halfeye(.width=c(.66, .90)) + geom_vline(xintercept = 0) + theme_minimal() +
  annotate("text", x=7, y=3.15, label= paste("p =", round(ps_means_p[3, 2], 2))) + 
  annotate("text", x=7, y=2.15, label= paste("p =", round(ps_means_p[6, 2], 2))) + 
  annotate("text", x=7, y=1.15, label= paste("p =", round(ps_means_p[9, 2], 2))) + 
  labs(y="Effect") +
  theme(axis.title.y=element_blank())

(p1 / p2) + labs(y = "Effect")

ggsave("plots/Fig9.png", last_plot(), dpi=500)

## Saving 7 x 5 in image

3.2 Only DOD Producers

mlong6_eff <- brm(RESPONSE.CODE ~ 1 + Time_sum*+Prime.Type*biasmatch + 
            (1 + Time_sum*Prime.Type*biasmatch | Blind_ID ) +
            (1 + Time_sum*Prime.Type*biasmatch   | Target.verb), 
          family="bernoulli", 
          prior=logit_prior, 
          iter=4000, 
          cores=4, 
          seed=12345,
          data=Combined2 %>% filter(Overlap=="No Overlap"), 
          file="LONG/output/mlong6_eff.rds")
mlong6_eff <- add_criterion(mlong6_eff, "loo")

#plot(mlong6_eff, N=4)

summary(mlong6_eff)$fixed

##                                           Estimate Est.Error   l-95% CI
## Intercept                              -2.04198662 0.9607526 -3.9211299
## Time_sum1                              -0.56895570 0.5159499 -1.5944660
## Time_sum2                              -0.37584735 0.4960739 -1.4155226
## Prime.Type                              0.85514397 0.6426347 -0.3921886
## biasmatchmismatch                      -1.16440316 0.5874601 -2.3894977
## Time_sum1:Prime.Type                   -0.17634321 0.7539536 -1.6828224
## Time_sum2:Prime.Type                   -0.40251753 0.7297878 -1.8989986
## Time_sum1:biasmatchmismatch             0.06573790 0.6919852 -1.3045572
## Time_sum2:biasmatchmismatch            -0.19903300 0.7791245 -1.8935938
## Prime.Type:biasmatchmismatch            0.28351026 1.1024708 -1.7758178
## Time_sum1:Prime.Type:biasmatchmismatch  0.09232731 1.1876217 -2.1999529
## Time_sum2:Prime.Type:biasmatchmismatch  1.06777574 1.2045903 -1.2227177
##                                           u-95% CI      Rhat Bulk_ESS Tail_ESS
## Intercept                              -0.08826107 1.0032450 2133.332 3581.345
## Time_sum1                               0.44332460 1.0010475 3894.898 4482.010
## Time_sum2                               0.56384912 1.0006848 3755.285 4152.618
## Prime.Type                              2.13830049 1.0002501 5123.351 5021.889
## biasmatchmismatch                      -0.05450468 1.0003625 4112.826 5478.828
## Time_sum1:Prime.Type                    1.33269773 1.0017485 4781.497 5196.944
## Time_sum2:Prime.Type                    0.97987603 1.0002816 5291.555 5632.706
## Time_sum1:biasmatchmismatch             1.40673645 0.9997446 3986.095 4945.762
## Time_sum2:biasmatchmismatch             1.22678031 1.0007165 3514.734 4410.186
## Prime.Type:biasmatchmismatch            2.60669211 1.0008242 5341.215 4987.847
## Time_sum1:Prime.Type:biasmatchmismatch  2.42617355 1.0002407 5154.651 5255.388
## Time_sum2:Prime.Type:biasmatchmismatch  3.43906197 1.0010375 4830.974 5145.186

mlong6_eff.model.check <- createDHARMa(
  simulatedResponse = t(posterior_predict(mlong6_eff)), 
  observedResponse = filter(Combined2, Overlap=="No Overlap")$RESPONSE.CODE, 
  fittedPredictedResponse = apply(t(posterior_epred(mlong6_eff)), 1, mean), 
  integerResponse=TRUE
)

plot(mlong6_eff.model.check) # standard plots

testDispersion(mlong6_eff.model.check)

## 
##  DHARMa nonparametric dispersion test via sd of residuals fitted vs.
##  simulated
## 
## data:  simulationOutput
## dispersion = 0.64014, p-value < 2.2e-16
## alternative hypothesis: two.sided

plotResiduals(mlong6_eff.model.check, form=filter(Combined2, Overlap=="No Overlap")$Time_sum)