This page contains the analyses of the practice trials, for analyses of the test trials, clink this link
Open the libraries to be used throughout these analyses.
library(eyetrackingR)
library(ggplot2)
library(psych)
library(brms)
library(tidyverse)
library(patchwork)
library(GGally)
library(patchwork)
library(wesanderson)
library(tidybayes)
theme_set(theme_minimal())
Opening data files (output suppressed)
full_2 <- read_csv("SyntacticBootstrappingData/SB_Practice_only.csv") # IMPL data
Demos <- read_csv("SyntacticBootstrappingData/Demos.csv") # Some Demographics
Part_To_Drop <- read_csv("SyntacticBootstrappingData/Part_To_Drop_Practice.csv") # List of participants to drop based on missing data
full_2 <- full_join(full_2, Part_To_Drop, by="ParticipantName")
mean_rt_short <- read_csv("SyntacticBootstrappingData/RT_24mo.csv") # RT
CDI_byverb_short <- read_csv("SyntacticBootstrappingData/CDI_byverb_short.csv")
Demos <- full_join(Demos, mean_rt_short, by="ParticipantName", all.x=TRUE, all.y=TRUE)
Create a variable indicating what the distracter item was. This will be used as a control variable.
full_2 <- full_2 %>%
mutate(
Distracter = ifelse(Target=="Clean", yes="Feed", no = ifelse( # each target was paired with a single distracter--note that this differs from the test trials
Target=="Feed", yes="Clean", no = ifelse(
Target=="Hug", yes="Tickle", no = "Hug"
)
))
)
Check the distribution is correct.
table(full_2$Target, full_2$Distracter)
##
## Clean Feed Hug Tickle
## Clean 0 134794 0 0
## Feed 139678 0 0 0
## Hug 0 0 0 137769
## Tickle 0 0 134827 0
Drop participants with hearing loss/other developmental delays.
full_2 <- filter(full_2 , ParticipantName != "BI8sFIfv" & ParticipantName != "rqqYhcLZ")
full_2 <- filter(full_2 , ParticipantName != "9oXjRYVw" & ParticipantName != "opeMB1zY")
How many missing windows per participant
table(Part_To_Drop$n_miss)
##
## 0 1 2 3 4
## 81 9 9 4 8
Drop participants with more than 66% trackloss on 2+ eight-second windows.
full_2 <- subset(full_2, n_miss < 2)
n_distinct(full_2$ParticipantName)
## [1] 90
Convert Dataset to EyeTrackingR Data and add variables for Window and Time Bin. Also, flag problematic trials.
SBdata <- make_eyetrackingr_data(full_2,
participant_column = "ParticipantName",
trial_column = "MediaName",
time_column = "TrialTime",
aoi_columns = c('AOI_Target','AOI_Distracter'),
trackloss_column = "TrackLoss",
treat_non_aoi_looks_as_missing = TRUE
)
## Converting Participants to proper type.
## Converting Trial to proper type.
## Converting Trackloss to proper type.
## Warning: `as_data_frame()` was deprecated in tibble 2.0.0.
## Please use `as_tibble()` instead.
## The signature and semantics have changed, see `?as_tibble`.
SB_early <- subset_by_window(SBdata,
window_start_time=19000, window_end_time=27000)
## Avg. window length in new data will be 8000
SB_early$Window = 1
SB_late <- subset_by_window(SBdata,
window_start_time=32000, window_end_time=40000)
## Avg. window length in new data will be 8000
SB_late$Window = 2
SB_full <- rbind(SB_early, SB_late)
SB_full$TimeBin <- rep(0, nrow(SB_full))
for(i in 1:nrow(SB_full)){
if(SB_full$TrialTime[i] >= 2000 & SB_full$TrialTime[i] < 4000) SB_full$TimeBin[i] = 1
if(SB_full$TrialTime[i] >= 4000 & SB_full$TrialTime[i] < 6000) SB_full$TimeBin[i] = 2
if(SB_full$TrialTime[i] >= 6000 & SB_full$TrialTime[i] < 8000) SB_full$TimeBin[i] = 3
}
SB_full$problem_window <- ifelse(SB_full$MediaName=="Seq4_Trial1.wmv" & SB_full$Trial==1 &SB_full$Window==1, yes=1, no=0 )
SB_full<- subset(SB_full, problem_window==0) # Removed window with error.
trackloss_analysis(SB_full)
Drop windows with greater than 66% missing data.
SB_full_clean <- clean_by_trackloss(SB_full, trial_prop_thresh=.66)
## Performing Trackloss Analysis...
## Will exclude trials whose trackloss proportion is greater than : 0.66
## ...removed 1 trials.
Another 1 trial with 66% track loss were removed.
Number of participants who completed each media file.
SB_full_clean %>%
make_time_window_data(summarize_by=c("ParticipantName", "MediaName")) %>%
filter(AOI=="AOI_Target") %>%
group_by(MediaName) %>%
count()
## Analyzing AOI_Target...
## Warning: `group_by_()` was deprecated in dplyr 0.7.0.
## Please use `group_by()` instead.
## See vignette('programming') for more help
## Warning: `summarise_()` was deprecated in dplyr 0.7.0.
## Please use `summarise()` instead.
## Analyzing AOI_Distracter...
Note that there was an error in the Tobii software, so that Practice_Girl_Left was played in place of Practice_Boy_Right.
First model, intercepts only. Does overall proportion of looks to target differ from chance?
m1 <-SB_full_clean %>%
make_time_window_data(summarize_by=c("ParticipantName", "MediaName")) %>%
filter(AOI=="AOI_Target") %>%
data.frame() %>%
brm(Prop ~ 1 + (1 | ParticipantName) + (1 | MediaName), family=Beta, data=.,
file = "models/practice/m1")
summary(m1)
## Family: beta
## Links: mu = logit; phi = identity
## Formula: Prop ~ 1 + (1 | ParticipantName) + (1 | MediaName)
## Data: . (Number of observations: 179)
## Samples: 4 chains, each with iter = 2000; warmup = 1000; thin = 1;
## total post-warmup samples = 4000
##
## Group-Level Effects:
## ~MediaName (Number of levels: 15)
## Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## sd(Intercept) 0.53 0.13 0.34 0.83 1.00 1091 1717
##
## ~ParticipantName (Number of levels: 90)
## Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## sd(Intercept) 0.21 0.10 0.02 0.38 1.01 594 1164
##
## Population-Level Effects:
## Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## Intercept 0.14 0.15 -0.15 0.44 1.00 708 1198
##
## Family Specific Parameters:
## Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## phi 11.32 1.63 8.56 14.84 1.00 1061 2070
##
## Samples were drawn using sampling(NUTS). For each parameter, Bulk_ESS
## and Tail_ESS are effective sample size measures, and Rhat is the potential
## scale reduction factor on split chains (at convergence, Rhat = 1).
Check hypothesis that overall looks are above 0.
hypothesis(m1, "Intercept > 0")
## Hypothesis Tests for class b:
## Hypothesis Estimate Est.Error CI.Lower CI.Upper Evid.Ratio Post.Prob
## 1 (Intercept) > 0 0.14 0.15 -0.1 0.39 5.16 0.84
## Star
## 1
## ---
## 'CI': 90%-CI for one-sided and 95%-CI for two-sided hypotheses.
## '*': For one-sided hypotheses, the posterior probability exceeds 95%;
## for two-sided hypotheses, the value tested against lies outside the 95%-CI.
## Posterior probabilities of point hypotheses assume equal prior probabilities.
In order to calculate bayes factors comparing null and alternative models. We need more specific priors to do this. I used the same priors for the models of the test trial data (below). I’ve excluded most of this output. See the R Markdown file for more details. In general the Bayes Factors were ambiguous (~.4 in favor of the null)
int_prior1 <- prior(normal(.4, .15), class="Intercept")
int_prior2 <- prior(normal(0, .05), class="Intercept")
int_prior3 <- prior(exponential(1), class="Intercept")
I’ve run the bayes factor function several times per comparison, because it is sometimes unstable if the number of iterations is too small.
Let’s see if some actions get more looks than others, as was the case with the test trials.
SB_full_clean %>%
make_time_window_data(summarize_by=c("ParticipantName", "Target")) %>%
filter(AOI=="AOI_Target") %>%
data.frame() %>%
brm(Prop ~ Target -1 + (0 + Target ||ParticipantName), iter=4000, control=list(adapt_delta=.9, max_treedepth=20), cores=4, save_all_pars=TRUE, family="beta", file="models/practice/mtarget", data=.) %>%
summary()
## Warning: There were 2 divergent transitions after warmup. Increasing adapt_delta
## above 0.9 may help. See http://mc-stan.org/misc/warnings.html#divergent-
## transitions-after-warmup
## Family: beta
## Links: mu = logit; phi = identity
## Formula: Prop ~ Target - 1 + (0 + Target || ParticipantName)
## Data: . (Number of observations: 179)
## Samples: 4 chains, each with iter = 4000; warmup = 2000; thin = 1;
## total post-warmup samples = 8000
##
## Group-Level Effects:
## ~ParticipantName (Number of levels: 90)
## Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## sd(TargetClean) 0.46 0.19 0.07 0.80 1.01 335 1126
## sd(TargetFeed) 0.35 0.17 0.03 0.67 1.01 286 547
## sd(TargetHug) 0.27 0.17 0.01 0.59 1.01 280 581
## sd(TargetTickle) 0.62 0.16 0.31 0.93 1.00 482 858
##
## Population-Level Effects:
## Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## TargetClean -0.56 0.11 -0.78 -0.33 1.00 6380 5340
## TargetFeed 0.81 0.09 0.62 0.99 1.00 4719 5902
## TargetHug 0.01 0.08 -0.15 0.18 1.00 7201 3486
## TargetTickle 0.09 0.13 -0.16 0.34 1.00 4380 5425
##
## Family Specific Parameters:
## Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## phi 19.68 13.26 9.76 59.22 1.01 211 232
##
## Samples were drawn using sampling(NUTS). For each parameter, Bulk_ESS
## and Tail_ESS are effective sample size measures, and Rhat is the potential
## scale reduction factor on split chains (at convergence, Rhat = 1).
Far more looks to Feed than to Clean. We can control for this by calculating the proportion of time participants look to the relevant action when it is the distracter.
Here I’ve added a control variable that accounts for the proportion of looks to the relevant action when it played as the distracter.
Looks_dis <- SB_full_clean %>%
make_time_window_data(summarize_by=c("ParticipantName", "MediaName"), predictor_columns="Distracter") %>%
filter(AOI=="AOI_Distracter") %>%
group_by(Distracter) %>%
mutate(
Dis_Prop = mean(Prop),
Logit.s = logit(Dis_Prop)
) %>%
slice(1) %>%
select(Distracter, Dis_Prop, Logit.s) %>%
rename(Target=Distracter)
## Analyzing AOI_Target...
## Analyzing AOI_Distracter...
Fit intercept-only model.
m2 <-SB_full_clean %>%
left_join(., Looks_dis, by="Target") %>%
make_time_window_data(summarize_by=c("ParticipantName", "MediaName"), predictor_columns=c("Target", "Logit.s")) %>%
filter(AOI=="AOI_Target") %>%
data.frame() %>%
brm(Prop ~ Logit.s + (1 + Logit.s|| ParticipantName) + (1 || MediaName), family=Beta, data=.,
file = "models/practice/m2") # remember, logit.s cannot vary within media.
summary(m2)
## Family: beta
## Links: mu = logit; phi = identity
## Formula: Prop ~ Logit.s + (1 + Logit.s || ParticipantName) + (1 || MediaName)
## Data: . (Number of observations: 179)
## Samples: 4 chains, each with iter = 2000; warmup = 1000; thin = 1;
## total post-warmup samples = 4000
##
## Group-Level Effects:
## ~MediaName (Number of levels: 15)
## Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## sd(Intercept) 0.17 0.09 0.02 0.37 1.01 756 1321
##
## ~ParticipantName (Number of levels: 90)
## Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## sd(Intercept) 0.22 0.10 0.02 0.39 1.00 690 1167
## sd(Logit.s) 0.26 0.17 0.01 0.63 1.01 815 1500
##
## Population-Level Effects:
## Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## Intercept 0.17 0.07 0.03 0.31 1.00 3206 2767
## Logit.s 1.00 0.16 0.68 1.29 1.00 3425 2951
##
## Family Specific Parameters:
## Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## phi 11.74 1.89 8.68 16.13 1.00 969 1441
##
## Samples were drawn using sampling(NUTS). For each parameter, Bulk_ESS
## and Tail_ESS are effective sample size measures, and Rhat is the potential
## scale reduction factor on split chains (at convergence, Rhat = 1).
hypothesis(m2, "Intercept > 0")
## Hypothesis Tests for class b:
## Hypothesis Estimate Est.Error CI.Lower CI.Upper Evid.Ratio Post.Prob
## 1 (Intercept) > 0 0.17 0.07 0.05 0.29 80.63 0.99
## Star
## 1 *
## ---
## 'CI': 90%-CI for one-sided and 95%-CI for two-sided hypotheses.
## '*': For one-sided hypotheses, the posterior probability exceeds 95%;
## for two-sided hypotheses, the value tested against lies outside the 95%-CI.
## Posterior probabilities of point hypotheses assume equal prior probabilities.
Looks like the proportion of looks to the target is above chance, when we control for proportion of looks to the relevant action when it was the distracter. This was not the case with the test trials.
The correlations between trials and windows were generally not statistically significant in the test trials, making it difficult to fit models of individual differences. In principle, however, we would expect that trials should be significantly correlated.
SB_full_clean %>%
make_time_window_data(summarize_by=c("ParticipantName", "Trial", "Window"), aois="AOI_Target") %>%
select(ParticipantName, Trial, Window, Prop) %>%
mutate(
Window = ifelse(Window==1, yes = "Window1", no ="Window2"),
Trial = ifelse(Trial==2, yes="Trial1", no ="Trial2")
) %>%
pivot_wider(id_cols=ParticipantName, names_from=c(Trial, Window), values_from=Prop) %>%
select(starts_with("Trial")) %>%
ggpairs()
## Warning: Removed 2 rows containing non-finite values (stat_density).
## Warning in ggally_statistic(data = data, mapping = mapping, na.rm = na.rm, :
## Removed 2 rows containing missing values
## Warning in ggally_statistic(data = data, mapping = mapping, na.rm = na.rm, :
## Removed 2 rows containing missing values
## Warning in ggally_statistic(data = data, mapping = mapping, na.rm = na.rm, :
## Removed 3 rows containing missing values
## Warning: Removed 2 rows containing missing values (geom_point).
## Warning: Removed 1 rows containing non-finite values (stat_density).
## Warning in ggally_statistic(data = data, mapping = mapping, na.rm = na.rm, :
## Removing 1 row that contained a missing value
## Warning in ggally_statistic(data = data, mapping = mapping, na.rm = na.rm, :
## Removed 2 rows containing missing values
## Warning: Removed 2 rows containing missing values (geom_point).
## Warning: Removed 1 rows containing missing values (geom_point).
## Warning in ggally_statistic(data = data, mapping = mapping, na.rm = na.rm, :
## Removing 1 row that contained a missing value
## Warning: Removed 3 rows containing missing values (geom_point).
## Warning: Removed 2 rows containing missing values (geom_point).
## Warning: Removed 1 rows containing missing values (geom_point).
## Warning: Removed 1 rows containing non-finite values (stat_density).
Let’s try collapsing across windows
SB_full_clean %>%
make_time_window_data(summarize_by=c("ParticipantName", "Trial"), aois="AOI_Target") %>%
mutate(
Trial = ifelse(Trial==1, yes="Trial 1", no="Trial2")
) %>%
select(ParticipantName, Trial, Prop) %>%
pivot_wider(id_cols=ParticipantName, names_from=Trial, values_from=Prop) %>%
select(starts_with("Trial")) %>%
ggpairs()
## Warning: Removed 1 rows containing non-finite values (stat_density).
## Warning in ggally_statistic(data = data, mapping = mapping, na.rm = na.rm, :
## Removing 1 row that contained a missing value
## Warning: Removed 1 rows containing missing values (geom_point).
Windows are positively correlated.
SB_full_clean %>%
make_time_window_data(summarize_by=c("ParticipantName", "Window"), aois="AOI_Target") %>%
mutate(
Window = ifelse(Window==1, yes="Window 1", no="Window 2")
) %>%
select(ParticipantName, Window, Prop) %>%
pivot_wider(id_cols=ParticipantName, names_from=Window, values_from=Prop) %>%
select(starts_with("Window")) %>%
ggpairs()
Windows within trials are correlated, but separate trials (i.e., different target verbs are not correlated)
Get distribution of performance on practice trials.
SB_full_clean %>%
make_time_window_data(summarize_by=c("ParticipantName")) %>%
filter(AOI=="AOI_Target") %>%
ggplot(aes(x=Prop)) + geom_histogram()
## Analyzing AOI_Target...
## Analyzing AOI_Distracter...
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
We should also look at how performance on the practice trials was predicted by LWL RT and the proportion of verbs a participant reportedly knew.
m9 <-SB_full_clean %>%
left_join(., Looks_dis, by="Target") %>%
left_join(., mean_rt_short, by="ParticipantName") %>%
make_time_window_data(summarize_by=c("ParticipantName", "MediaName"), predictor_columns=c("Logit.s", "RT24" )) %>%
mutate(
RTZ = scale(RT24)
) %>%
filter(AOI=="AOI_Target") %>%
data.frame() %>%
brm(Prop ~ 1 + RTZ + Logit.s + (1 + Logit.s || ParticipantName) + (1 + RTZ || MediaName), family=Beta, data=.,
file = "models/practice/m9")
summary(m9)
## Family: beta
## Links: mu = logit; phi = identity
## Formula: Prop ~ 1 + RTZ + Logit.s + (1 + Logit.s || ParticipantName) + (1 + RTZ || MediaName)
## Data: . (Number of observations: 171)
## Samples: 4 chains, each with iter = 2000; warmup = 1000; thin = 1;
## total post-warmup samples = 4000
##
## Group-Level Effects:
## ~MediaName (Number of levels: 15)
## Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## sd(Intercept) 0.16 0.09 0.01 0.37 1.01 925 1517
## sd(RTZ) 0.08 0.07 0.00 0.24 1.00 1625 2098
##
## ~ParticipantName (Number of levels: 86)
## Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## sd(Intercept) 0.25 0.10 0.03 0.42 1.01 595 632
## sd(Logit.s) 0.28 0.18 0.01 0.68 1.00 801 1739
##
## Population-Level Effects:
## Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## Intercept 0.17 0.08 0.02 0.32 1.00 3322 2430
## RTZ -0.01 0.06 -0.13 0.12 1.00 3478 2913
## Logit.s 1.01 0.16 0.68 1.32 1.00 2845 2139
##
## Family Specific Parameters:
## Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## phi 12.11 2.08 8.74 16.79 1.01 763 1608
##
## Samples were drawn using sampling(NUTS). For each parameter, Bulk_ESS
## and Tail_ESS are effective sample size measures, and Rhat is the potential
## scale reduction factor on split chains (at convergence, Rhat = 1).
hypothesis(m9, "RTZ < 0" )
## Hypothesis Tests for class b:
## Hypothesis Estimate Est.Error CI.Lower CI.Upper Evid.Ratio Post.Prob Star
## 1 (RTZ) < 0 -0.01 0.06 -0.11 0.1 1.16 0.54
## ---
## 'CI': 90%-CI for one-sided and 95%-CI for two-sided hypotheses.
## '*': For one-sided hypotheses, the posterior probability exceeds 95%;
## for two-sided hypotheses, the value tested against lies outside the 95%-CI.
## Posterior probabilities of point hypotheses assume equal prior probabilities.
Not much evidence of an effect of LWL RT.
m10 <-SB_full_clean %>%
left_join(., Looks_dis, by="Target") %>%
left_join(., CDI_byverb_short, by="ParticipantName") %>%
make_time_window_data(summarize_by=c("ParticipantName", "MediaName"), predictor_columns = c("Logit.s", "total_verbs")) %>%
mutate(
verb_prop = total_verbs/max(total_verbs, na.rm=TRUE)
) %>%
filter(AOI=="AOI_Target") %>%
data.frame() %>%
brm(Prop ~ 1 + verb_prop+ Logit.s + (1 + Logit.s || ParticipantName) + (1 + verb_prop || MediaName), family=Beta, data=.,
file = "models/practice/m10")
summary(m10)
## Family: beta
## Links: mu = logit; phi = identity
## Formula: Prop ~ 1 + verb_prop + Logit.s + (1 + Logit.s || ParticipantName) + (1 + verb_prop || MediaName)
## Data: . (Number of observations: 173)
## Samples: 4 chains, each with iter = 2000; warmup = 1000; thin = 1;
## total post-warmup samples = 4000
##
## Group-Level Effects:
## ~MediaName (Number of levels: 15)
## Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## sd(Intercept) 0.14 0.09 0.01 0.34 1.00 974 1385
## sd(verb_prop) 0.14 0.11 0.01 0.40 1.00 1881 2024
##
## ~ParticipantName (Number of levels: 87)
## Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## sd(Intercept) 0.21 0.10 0.02 0.39 1.00 673 1134
## sd(Logit.s) 0.33 0.20 0.01 0.74 1.01 547 1217
##
## Population-Level Effects:
## Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## Intercept -0.05 0.11 -0.27 0.17 1.00 3345 2926
## verb_prop 0.42 0.19 0.06 0.81 1.00 2660 1584
## Logit.s 1.02 0.16 0.70 1.33 1.00 3076 2834
##
## Family Specific Parameters:
## Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## phi 12.17 2.21 8.82 17.51 1.00 779 808
##
## Samples were drawn using sampling(NUTS). For each parameter, Bulk_ESS
## and Tail_ESS are effective sample size measures, and Rhat is the potential
## scale reduction factor on split chains (at convergence, Rhat = 1).
hypothesis(m10, "verb_prop > 0")
## Hypothesis Tests for class b:
## Hypothesis Estimate Est.Error CI.Lower CI.Upper Evid.Ratio Post.Prob
## 1 (verb_prop) > 0 0.42 0.19 0.12 0.74 84.11 0.99
## Star
## 1 *
## ---
## 'CI': 90%-CI for one-sided and 95%-CI for two-sided hypotheses.
## '*': For one-sided hypotheses, the posterior probability exceeds 95%;
## for two-sided hypotheses, the value tested against lies outside the 95%-CI.
## Posterior probabilities of point hypotheses assume equal prior probabilities.
Stronger evidence of an effect of the proportion of verbs reported as known. More verbs, more looks.