Exp1: ESegmentation Pilot Analysis

——————————-Exclude Participant——————————-

all_raw_data <- all_raw_data %>%
  filter(run_id != "38")

————————————-Clean Data————————————-

clean_data_block1 <- clean_data %>%
  filter(block == 1)
confidence_data <- clean_data_block1 %>%
  filter(!is.na(confidence_rating)) %>%
  select(run_id, stimulus_name, predictability, confidence_rating)

confidence_data %>%
  count(confidence_rating, sort = TRUE) %>%
  mutate(percent = round(100 * n / sum(n), 2))
## # A tibble: 5 × 3
##   confidence_rating        n percent
##   <chr>                <int>   <dbl>
## 1 Moderately confident   177   42.8 
## 2 Very confident          89   21.5 
## 3 Neutral                 70   16.9 
## 4 Slightly unconfident    62   15.0 
## 5 Very unconfident        16    3.86
# Count Very unconfident
confidence_data %>%
  filter(confidence_rating == "Very unconfident") %>%
  nrow()
## [1] 16
# 1. Print attention-failure summary before removing anything
print(
  clean_data_block1 %>%
    count(video_attention_failure_type)
)
## # A tibble: 1 × 2
##   video_attention_failure_type     n
##   <chr>                        <int>
## 1 <NA>                           828
# 2. Remove trials with attention failures
clean_data_attn_clean <- clean_data_block1 %>%
  filter(is.na(video_attention_failure_type) | video_attention_failure_type == "")

# 3. Add row id after attention-failure removal
clean_data2 <- clean_data_attn_clean %>%
  mutate(row_id = row_number())

# 4. Find rows containing "Very unconfident"
very_unconf_rows <- clean_data2 %>%
  filter(confidence_rating == "Very unconfident") %>%
  pull(row_id)

# 5. Remove those confidence-rating rows AND the row immediately before them
clean_data2 <- clean_data2 %>%
  filter(!(row_id %in% c(very_unconf_rows, very_unconf_rows - 1))) %>%
  select(-row_id)
segmentation_data <- clean_data2 %>%
  filter(trial_kind == "segmentation_video") %>%
  select(
    -any_of(c(
      "trial_type", "time_elapsed", "PROLIFIC_PID", "trial_index", "trial_kind", "pair_number"))
  )
dim(segmentation_data)
## [1] 398  13

———————————Descriptive Data———————————

Participant level

participant_mean <- segmentation_data %>%
  mutate(boundary_count_num = readr::parse_number(boundary_count)) %>%
  group_by(run_id) %>%
  summarise(mean_NoB = mean(boundary_count_num, na.rm = TRUE),.groups = "drop")

grand_mean <- mean(participant_mean$mean_NoB)
ground_truth_mean <- 13.64

ggplot(participant_mean, aes(x = mean_NoB)) +
  geom_histogram(binwidth = 1, color = "black", fill = "skyblue") +
  geom_vline(xintercept = grand_mean, linetype = "dashed", linewidth = 1.2, color = "red") +
  annotate("text", x = grand_mean, y = Inf, label = paste0("Participants' Mean = ", round(grand_mean, 2)), color = "red", vjust = 1.5, hjust = 0.6, size = 4) +
  labs(title = "Distribution of Participants' Average Number of Boundaries", x = "Average Number of Boundaries per Video", y = "Number of Participants") +
  theme_minimal(base_size = 14) +
  theme(plot.title = element_text(face = "bold")
  )

Average NoB

button_count_table <- segmentation_data %>%
  mutate(boundary_count = as.numeric(boundary_count)) %>%
  group_by(stimulus_name, predictability) %>%
  summarise(mean_button_count = mean(boundary_count, na.rm = TRUE), n_trials = n(),.groups = "drop") %>%
  pivot_wider(id_cols = stimulus_name, names_from = predictability, values_from = mean_button_count) %>%
  mutate(Difference_UP = Unpredictable - Predictable) %>%
  arrange(desc(Difference_UP))

button_count_long <- segmentation_data %>%
  mutate(boundary_count = as.numeric(boundary_count)) %>%
  group_by(stimulus_name, predictability) %>%
  summarise(mean_button_count = mean(boundary_count, na.rm = TRUE), n_trials = n(),.groups = "drop")

ggplot(button_count_long, aes(x = predictability, y = mean_button_count, group = stimulus_name)) +
  geom_line(alpha = 0.4, color = "grey60") +
  geom_point(aes(color = predictability), size = 2.5) +
  stat_summary(aes(group = 1), fun = mean, geom = "line", linewidth = 1.2, color = "black") +
  stat_summary(aes(group = 1), fun = mean, geom = "point", size = 3.5, color = "black") +
  labs(x = NULL, y = "Mean NoB", title = "Mean NoB Across Predictability Conditions") +
  theme_minimal(base_size = 14) +
  theme(legend.position = "none", plot.title = element_text(face = "bold"), plot.subtitle = element_text(color = "grey40"))

—————————–Mixed Effect Regression—————————–

MEM for the effect of Predictability on NoB

  • After accounting for individual differences in segmentation tendencies and differences among videos, does predictability affect the average number of boundaries?
segmentation_data <- segmentation_data %>%
  mutate(boundary_count = as.numeric(boundary_count))

MEM_mean_Gaussian <- lmer(boundary_count ~ predictability + (1 | run_id) + (1 | stimulus_name), data = segmentation_data)

summary(MEM_mean_Gaussian)
## Linear mixed model fit by REML. t-tests use Satterthwaite's method [
## lmerModLmerTest]
## Formula: boundary_count ~ predictability + (1 | run_id) + (1 | stimulus_name)
##    Data: segmentation_data
## 
## REML criterion at convergence: 1327.1
## 
## Scaled residuals: 
##     Min      1Q  Median      3Q     Max 
## -3.5122 -0.5833  0.0007  0.5211  3.4958 
## 
## Random effects:
##  Groups        Name        Variance Std.Dev.
##  stimulus_name (Intercept) 0.9785   0.9892  
##  run_id        (Intercept) 2.5360   1.5925  
##  Residual                  1.1856   1.0889  
## Number of obs: 398, groups:  stimulus_name, 30; run_id, 14
## 
## Fixed effects:
##                             Estimate Std. Error       df t value Pr(>|t|)    
## (Intercept)                   4.4237     0.4697  18.2779   9.419 1.94e-08 ***
## predictabilityUnpredictable   0.1863     0.1134 357.3725   1.643    0.101    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Correlation of Fixed Effects:
##             (Intr)
## prdctbltyUn -0.132
anova(MEM_mean_Gaussian)
## Type III Analysis of Variance Table with Satterthwaite's method
##                Sum Sq Mean Sq NumDF  DenDF F value Pr(>F)
## predictability 3.1998  3.1998     1 357.37  2.6988 0.1013
pred <- ggpredict(MEM_mean_Gaussian, terms = "predictability")
ggplot(pred, aes(x = x, y = predicted)) + geom_point(size = 3) + geom_line(aes(group = 1), linewidth = 1) +
  geom_errorbar(aes(ymin = conf.low, ymax = conf.high), width = 0.1, linewidth = 0.8) +
  labs(x = "Predictability", y = "Predicted mean boundary count", title = "Mixed-effects Model Predictions") +
  theme_minimal(base_size = 14)

### MEM with Poisson Distribution

model_pois <- glmer(boundary_count ~ predictability + (1 | run_id) + (1 | stimulus_name), family = poisson,
data = segmentation_data)

summary(model_pois)
## Generalized linear mixed model fit by maximum likelihood (Laplace
##   Approximation) [glmerMod]
##  Family: poisson  ( log )
## Formula: boundary_count ~ predictability + (1 | run_id) + (1 | stimulus_name)
##    Data: segmentation_data
## 
##       AIC       BIC    logLik -2*log(L)  df.resid 
##    1516.3    1532.2    -754.1    1508.3       394 
## 
## Scaled residuals: 
##      Min       1Q   Median       3Q      Max 
## -1.72822 -0.32129 -0.04172  0.26558  1.63957 
## 
## Random effects:
##  Groups        Name        Variance Std.Dev.
##  stimulus_name (Intercept) 0.0373   0.1931  
##  run_id        (Intercept) 0.1098   0.3314  
## Number of obs: 398, groups:  stimulus_name, 30; run_id, 14
## 
## Fixed effects:
##                             Estimate Std. Error z value Pr(>|z|)    
## (Intercept)                  1.41610    0.10230   13.84   <2e-16 ***
## predictabilityUnpredictable  0.04116    0.04898    0.84    0.401    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Correlation of Fixed Effects:
##             (Intr)
## prdctbltyUn -0.264
pred_pois <- ggpredict(model_pois, terms = "predictability")
## You are calculating adjusted predictions on the population-level (i.e.
##   `type = "fixed"`) for a *generalized* linear mixed model.
##   This may produce biased estimates due to Jensen's inequality. Consider
##   setting `bias_correction = TRUE` to correct for this bias.
##   See also the documentation of the `bias_correction` argument.
ggplot(pred_pois, aes(x = x, y = predicted)) + geom_point(size = 3) + geom_line(aes(group = 1), linewidth = 1) +
  geom_errorbar(aes(ymin = conf.low, ymax = conf.high), width = 0.1, linewidth = 0.8) +
  labs(x = "Predictability", y = "Predicted mean boundary count", title = "Poisson Mixed-effects Model Predictions")+
  theme_minimal(base_size = 14)

overdispersion_ratio <-
    sum(residuals(model_pois, type = "pearson")^2) / df.residual(model_pois)
overdispersion_ratio
## [1] 0.2428319

——————-Analyzsis on Pre/Critical/Post——————-

Interval Analysis: 3 Seperate Regression

window_counts <- clean_data %>%
  filter(trial_kind == "segmentation_video") %>%
  select(run_id, stimulus_name, predictability, boundary_times_sec) %>%
  left_join(critical_windows, by = "stimulus_name") %>%
  left_join(video_duration, by = "stimulus_name") %>% rowwise() %>%
  mutate(
    boundary_times = list(jsonlite::fromJSON(boundary_times_sec)),
    pre_count = sum(boundary_times >= 0 & boundary_times < critical_start),
    critical_count = sum(boundary_times >= critical_start & boundary_times <= critical_end),
    post_count = sum(boundary_times > critical_end & boundary_times <= video_duration_sec)) %>%
  ungroup() %>%
  mutate(predictability = factor(predictability), run_id = factor(run_id), stimulus_name = factor(stimulus_name))

model_pre <- lmer(pre_count ~ predictability + (1 | run_id) + (1 | stimulus_name), data = window_counts)
model_critical <- lmer(critical_count ~ predictability + (1 | run_id) + (1 | stimulus_name), data = window_counts)
model_post <- lmer(post_count ~ predictability + (1 | run_id) + (1 | stimulus_name), data = window_counts)

summary(model_pre)
## Linear mixed model fit by REML. t-tests use Satterthwaite's method [
## lmerModLmerTest]
## Formula: pre_count ~ predictability + (1 | run_id) + (1 | stimulus_name)
##    Data: window_counts
## 
## REML criterion at convergence: 2098
## 
## Scaled residuals: 
##     Min      1Q  Median      3Q     Max 
## -2.8271 -0.5986 -0.0260  0.5526  4.7852 
## 
## Random effects:
##  Groups        Name        Variance Std.Dev.
##  stimulus_name (Intercept) 0.4732   0.6879  
##  run_id        (Intercept) 0.6177   0.7859  
##  Residual                  0.6305   0.7941  
## Number of obs: 820, groups:  stimulus_name, 30; run_id, 14
## 
## Fixed effects:
##                              Estimate Std. Error        df t value Pr(>|t|)    
## (Intercept)                 1.891e+00  2.478e-01 2.295e+01   7.628 9.76e-08 ***
## predictabilityUnpredictable 7.763e-03  5.549e-02 7.761e+02   0.140    0.889    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Correlation of Fixed Effects:
##             (Intr)
## prdctbltyUn -0.112
summary(model_critical)
## Linear mixed model fit by REML. t-tests use Satterthwaite's method [
## lmerModLmerTest]
## Formula: critical_count ~ predictability + (1 | run_id) + (1 | stimulus_name)
##    Data: window_counts
## 
## REML criterion at convergence: 1482.3
## 
## Scaled residuals: 
##     Min      1Q  Median      3Q     Max 
## -3.0713 -0.6845 -0.0376  0.6520  6.3505 
## 
## Random effects:
##  Groups        Name        Variance Std.Dev.
##  stimulus_name (Intercept) 0.13793  0.3714  
##  run_id        (Intercept) 0.07269  0.2696  
##  Residual                  0.30891  0.5558  
## Number of obs: 820, groups:  stimulus_name, 30; run_id, 14
## 
## Fixed effects:
##                              Estimate Std. Error        df t value Pr(>|t|)    
## (Intercept)                   0.66233    0.10268  34.29174   6.451 2.17e-07 ***
## predictabilityUnpredictable   0.04392    0.03884 776.06592   1.131    0.258    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Correlation of Fixed Effects:
##             (Intr)
## prdctbltyUn -0.189
summary(model_post)
## Linear mixed model fit by REML. t-tests use Satterthwaite's method [
## lmerModLmerTest]
## Formula: post_count ~ predictability + (1 | run_id) + (1 | stimulus_name)
##    Data: window_counts
## 
## REML criterion at convergence: 2126.9
## 
## Scaled residuals: 
##     Min      1Q  Median      3Q     Max 
## -3.9520 -0.5874 -0.0382  0.6044  5.5076 
## 
## Random effects:
##  Groups        Name        Variance Std.Dev.
##  stimulus_name (Intercept) 0.4324   0.6576  
##  run_id        (Intercept) 0.4811   0.6936  
##  Residual                  0.6592   0.8119  
## Number of obs: 820, groups:  stimulus_name, 30; run_id, 14
## 
## Fixed effects:
##                              Estimate Std. Error        df t value Pr(>|t|)    
## (Intercept)                   2.00600    0.22446  24.61226   8.937 3.38e-09 ***
## predictabilityUnpredictable  -0.04839    0.05674 776.05930  -0.853    0.394    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Correlation of Fixed Effects:
##             (Intr)
## prdctbltyUn -0.126
#Plot
window_counts_long <- window_counts %>%
  select(run_id, stimulus_name, predictability, pre_count, critical_count, post_count) %>%
  pivot_longer( cols = c(pre_count, critical_count, post_count), names_to = "window", values_to = "boundary_count" ) %>%
  mutate(
    window = recode( window, pre_count = "Pre-critical", critical_count = "Critical", post_count = "Post-critical"),
    window = factor(window, levels = c("Pre-critical", "Critical", "Post-critical")),
    predictability = factor(predictability, levels = c("Predictable", "Unpredictable")))

window_means <- window_counts_long %>%
  group_by(window, predictability) %>%
  summarise( mean_boundary_count = mean(boundary_count, na.rm = TRUE), se = sd(boundary_count, na.rm = TRUE) / sqrt(n()), .groups = "drop")

ggplot(window_means, aes(x = predictability, y = mean_boundary_count, group = window, color = window)) +
  geom_line(linewidth = 1.3) + geom_point(size = 3.5) +
  geom_errorbar(aes(ymin = mean_boundary_count - se, ymax = mean_boundary_count + se), width = 0.08, linewidth = 0.7) +
  scale_color_manual(values = c("Pre-critical" = "#1b9e77", "Critical" = "#d95f02", "Post-critical" = "#7570b3")) +
  labs(x = NULL, y = "Mean boundary count", color = "Video window", title = "Boundary Counts Across Predictability Conditions") +
  theme_minimal(base_size = 14)

Interval Analysis: 1 Regression

# Create counts for each video window
window_counts <- clean_data %>% filter(trial_kind == "segmentation_video") %>%
  select(run_id, stimulus_name, predictability, boundary_times_sec) %>%
  left_join(critical_windows, by = "stimulus_name") %>% left_join(video_duration, by = "stimulus_name") %>% rowwise() %>%
  mutate(boundary_times = list(jsonlite::fromJSON(boundary_times_sec)),
    pre_count = sum(boundary_times >= 0 & boundary_times < critical_start),
    critical_count = sum(boundary_times >= critical_start & boundary_times <= critical_end),
    post_count = sum(boundary_times > critical_end & boundary_times <= video_duration_sec) ) %>%
  ungroup()

# Convert to long format: one row per trial × interval
window_counts_long <- window_counts %>%
  select(run_id, stimulus_name, predictability, pre_count, critical_count, post_count) %>%
  pivot_longer(cols = c(pre_count, critical_count, post_count), names_to = "Interval", values_to = "boundary_count") %>%
  mutate(
    Interval = recode(Interval, pre_count = "Pre-critical", critical_count = "Critical", post_count = "Post-critical"),
    Interval = factor(Interval, levels = c("Pre-critical", "Critical", "Post-critical")),
    predictability = factor(predictability, levels = c("Predictable", "Unpredictable")),
    run_id = factor(run_id), stimulus_name = factor(stimulus_name))

# One mixed-effects model with Interval and predictability as fixed effects
model_interval_int <- lmer(boundary_count ~ predictability * Interval + (1 | run_id) + (1 | stimulus_name), data = window_counts_long)
model_interval_add <- lmer(boundary_count ~ predictability + Interval + (1 | run_id) + (1 | stimulus_name), data = window_counts_long)

summary(model_interval_int)
## Linear mixed model fit by REML. t-tests use Satterthwaite's method [
## lmerModLmerTest]
## Formula: boundary_count ~ predictability * Interval + (1 | run_id) + (1 |  
##     stimulus_name)
##    Data: window_counts_long
## 
## REML criterion at convergence: 6723.8
## 
## Scaled residuals: 
##     Min      1Q  Median      3Q     Max 
## -3.8209 -0.6609 -0.0937  0.5387  5.4577 
## 
## Random effects:
##  Groups        Name        Variance Std.Dev.
##  stimulus_name (Intercept) 0.09109  0.3018  
##  run_id        (Intercept) 0.31593  0.5621  
##  Residual                  0.85061  0.9223  
## Number of obs: 2460, groups:  stimulus_name, 30; run_id, 14
## 
## Fixed effects:
##                                                     Estimate Std. Error
## (Intercept)                                          1.88539    0.16635
## predictabilityUnpredictable                          0.01299    0.06443
## IntervalCritical                                    -1.22628    0.06434
## IntervalPost-critical                                0.12895    0.06434
## predictabilityUnpredictable:IntervalCritical         0.03557    0.09110
## predictabilityUnpredictable:IntervalPost-critical   -0.07027    0.09110
##                                                           df t value Pr(>|t|)
## (Intercept)                                         18.78034  11.334 7.73e-10
## predictabilityUnpredictable                       2412.07880   0.202   0.8402
## IntervalCritical                                  2411.94645 -19.060  < 2e-16
## IntervalPost-critical                             2411.94645   2.004   0.0451
## predictabilityUnpredictable:IntervalCritical      2411.94645   0.390   0.6962
## predictabilityUnpredictable:IntervalPost-critical 2411.94645  -0.771   0.4405
##                                                      
## (Intercept)                                       ***
## predictabilityUnpredictable                          
## IntervalCritical                                  ***
## IntervalPost-critical                             *  
## predictabilityUnpredictable:IntervalCritical         
## predictabilityUnpredictable:IntervalPost-critical    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Correlation of Fixed Effects:
##             (Intr) prdctU IntrvC IntrP- prU:IC
## prdctbltyUn -0.193                            
## IntrvlCrtcl -0.193  0.499                     
## IntrvlPst-c -0.193  0.499  0.500              
## prdctblU:IC  0.137 -0.707 -0.706 -0.353       
## prdctbU:IP-  0.137 -0.707 -0.353 -0.706  0.500
summary(model_interval_add)
## Linear mixed model fit by REML. t-tests use Satterthwaite's method [
## lmerModLmerTest]
## Formula: boundary_count ~ predictability + Interval + (1 | run_id) + (1 |  
##     stimulus_name)
##    Data: window_counts_long
## 
## REML criterion at convergence: 6719
## 
## Scaled residuals: 
##     Min      1Q  Median      3Q     Max 
## -3.7897 -0.6592 -0.0948  0.5406  5.4264 
## 
## Random effects:
##  Groups        Name        Variance Std.Dev.
##  stimulus_name (Intercept) 0.09109  0.3018  
##  run_id        (Intercept) 0.31593  0.5621  
##  Residual                  0.85039  0.9222  
## Number of obs: 2460, groups:  stimulus_name, 30; run_id, 14
## 
## Fixed effects:
##                               Estimate Std. Error         df t value Pr(>|t|)
## (Intercept)                  1.891e+00  1.643e-01  1.786e+01  11.512 1.08e-09
## predictabilityUnpredictable  1.423e-03  3.720e-02  2.414e+03   0.038   0.9695
## IntervalCritical            -1.209e+00  4.554e-02  2.414e+03 -26.536  < 2e-16
## IntervalPost-critical        9.390e-02  4.554e-02  2.414e+03   2.062   0.0393
##                                
## (Intercept)                 ***
## predictabilityUnpredictable    
## IntervalCritical            ***
## IntervalPost-critical       *  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Correlation of Fixed Effects:
##             (Intr) prdctU IntrvC
## prdctbltyUn -0.113              
## IntrvlCrtcl -0.139  0.000       
## IntrvlPst-c -0.139  0.000  0.500
model_interval_add_ml <- lmer(boundary_count ~ predictability + Interval + (1 | run_id) + (1 | stimulus_name), data = window_counts_long, REML = FALSE)

model_interval_int_ml <- lmer(boundary_count ~ predictability * Interval + (1 | run_id) + (1 | stimulus_name), data = window_counts_long, REML = FALSE)

anova(model_interval_add_ml, model_interval_int_ml)
## Data: window_counts_long
## Models:
## model_interval_add_ml: boundary_count ~ predictability + Interval + (1 | run_id) + (1 | stimulus_name)
## model_interval_int_ml: boundary_count ~ predictability * Interval + (1 | run_id) + (1 | stimulus_name)
##                       npar    AIC    BIC  logLik -2*log(L)  Chisq Df Pr(>Chisq)
## model_interval_add_ml    7 6717.4 6758.1 -3351.7    6703.4                     
## model_interval_int_ml    9 6720.0 6772.3 -3351.0    6702.0 1.4008  2     0.4964
pred_Int_add <- ggpredict(model_interval_add, terms = c("predictability", "Interval"))

ggplot(pred_Int_add, aes(x = x, y = predicted, color = group, group = group)) +
  geom_point(size = 3) +
  geom_line(linewidth = 1.2) + geom_errorbar(aes(ymin = conf.low, ymax = conf.high), width = .08) +
  labs(x = "Predictability", y = "Predicted boundary count", color = "Interval", title = "Predicted Boundary Count by Predictability and Interval") +
  theme_minimal(base_size = 14)

emm <- emmeans(model_interval_int, ~ predictability * Interval)
## Cannot use mode = "kenward-roger" because *pbkrtest* package is not installed
emm_df <- as.data.frame(emm)

ggplot(emm_df, aes(x = predictability, y = emmean, color = Interval, group = Interval)) +
  geom_point(size = 3) +
  geom_line(linewidth = 1.2) +
  geom_errorbar(aes(ymin = lower.CL, ymax = upper.CL), width = 0.08) +
  labs(x = "Predictability", y = "Predicted boundary count", color = "Interval", title = "Predicted Boundary Count by Predictability x Interval") +
  theme_minimal(base_size = 14)