Exp1: ESegmentation Pilot Analysis

——————————-Exclude Participant——————————-

all_raw_data <- all_raw_data %>%
  filter(run_id != "38")

————————————-Clean Data————————————-

clean_data_block1 <- clean_data %>%
  filter(block == 1)

confidence_data <- clean_data_block1 %>%
  filter(!is.na(confidence_rating)) %>%
  select(run_id, stimulus_name, predictability, confidence_rating)

confidence_data %>%
  count(confidence_rating, sort = TRUE) %>%
  mutate(percent = round(100 * n / sum(n), 2))

## # A tibble: 5 × 3
##   confidence_rating        n percent
##   <chr>                <int>   <dbl>
## 1 Moderately confident   177   42.8 
## 2 Very confident          89   21.5 
## 3 Neutral                 70   16.9 
## 4 Slightly unconfident    62   15.0 
## 5 Very unconfident        16    3.86

# Count Very unconfident
confidence_data %>%
  filter(confidence_rating == "Very unconfident") %>%
  nrow()

## [1] 16

# 1. Print attention-failure summary before removing anything
print(
  clean_data_block1 %>%
    count(video_attention_failure_type)
)

## # A tibble: 1 × 2
##   video_attention_failure_type     n
##   <chr>                        <int>
## 1 <NA>                           828

# 2. Remove trials with attention failures
clean_data_attn_clean <- clean_data_block1 %>%
  filter(is.na(video_attention_failure_type) | video_attention_failure_type == "")

# 3. Add row id after attention-failure removal
clean_data2 <- clean_data_attn_clean %>%
  mutate(row_id = row_number())

# 4. Find rows containing "Very unconfident"
very_unconf_rows <- clean_data2 %>%
  filter(confidence_rating == "Very unconfident") %>%
  pull(row_id)

# 5. Remove those confidence-rating rows AND the row immediately before them
clean_data2 <- clean_data2 %>%
  filter(!(row_id %in% c(very_unconf_rows, very_unconf_rows - 1))) %>%
  select(-row_id)

segmentation_data <- clean_data2 %>%
  filter(trial_kind == "segmentation_video") %>%
  select(
    -any_of(c(
      "trial_type", "time_elapsed", "PROLIFIC_PID", "trial_index", "trial_kind", "pair_number"))
  )
dim(segmentation_data)

## [1] 398  13

———————————Descriptive Data———————————

Participant level

participant_mean <- segmentation_data %>%
  mutate(boundary_count_num = readr::parse_number(boundary_count)) %>%
  group_by(run_id) %>%
  summarise(mean_NoB = mean(boundary_count_num, na.rm = TRUE),.groups = "drop")

grand_mean <- mean(participant_mean$mean_NoB)
ground_truth_mean <- 13.64

ggplot(participant_mean, aes(x = mean_NoB)) +
  geom_histogram(binwidth = 1, color = "black", fill = "skyblue") +
  geom_vline(xintercept = grand_mean, linetype = "dashed", linewidth = 1.2, color = "red") +
  annotate("text", x = grand_mean, y = Inf, label = paste0("Participants' Mean = ", round(grand_mean, 2)), color = "red", vjust = 1.5, hjust = 0.6, size = 4) +
  labs(title = "Distribution of Participants' Average Number of Boundaries", x = "Average Number of Boundaries per Video", y = "Number of Participants") +
  theme_minimal(base_size = 14) +
  theme(plot.title = element_text(face = "bold")
  )

Average NoB

button_count_table <- segmentation_data %>%
  mutate(boundary_count = as.numeric(boundary_count)) %>%
  group_by(stimulus_name, predictability) %>%
  summarise(mean_button_count = mean(boundary_count, na.rm = TRUE), n_trials = n(),.groups = "drop") %>%
  pivot_wider(id_cols = stimulus_name, names_from = predictability, values_from = mean_button_count) %>%
  mutate(Difference_UP = Unpredictable - Predictable) %>%
  arrange(desc(Difference_UP))

button_count_long <- segmentation_data %>%
  mutate(boundary_count = as.numeric(boundary_count)) %>%
  group_by(stimulus_name, predictability) %>%
  summarise(mean_button_count = mean(boundary_count, na.rm = TRUE), n_trials = n(),.groups = "drop")

ggplot(button_count_long, aes(x = predictability, y = mean_button_count, group = stimulus_name)) +
  geom_line(alpha = 0.4, color = "grey60") +
  geom_point(aes(color = predictability), size = 2.5) +
  stat_summary(aes(group = 1), fun = mean, geom = "line", linewidth = 1.2, color = "black") +
  stat_summary(aes(group = 1), fun = mean, geom = "point", size = 3.5, color = "black") +
  labs(x = NULL, y = "Mean NoB", title = "Mean NoB Across Predictability Conditions") +
  theme_minimal(base_size = 14) +
  theme(legend.position = "none", plot.title = element_text(face = "bold"), plot.subtitle = element_text(color = "grey40"))

—————————–Mixed Effect Regression—————————–

MEM for the effect of Predictability on NoB

After accounting for individual differences in segmentation tendencies and differences among videos, does predictability affect the average number of boundaries?

segmentation_data <- segmentation_data %>%
  mutate(boundary_count = as.numeric(boundary_count))

MEM_mean_Gaussian <- lmer(boundary_count ~ predictability + (1 | run_id) + (1 | stimulus_name), data = segmentation_data)

summary(MEM_mean_Gaussian)

## Linear mixed model fit by REML. t-tests use Satterthwaite's method [
## lmerModLmerTest]
## Formula: boundary_count ~ predictability + (1 | run_id) + (1 | stimulus_name)
##    Data: segmentation_data
## 
## REML criterion at convergence: 1327.1
## 
## Scaled residuals: 
##     Min      1Q  Median      3Q     Max 
## -3.5122 -0.5833  0.0007  0.5211  3.4958 
## 
## Random effects:
##  Groups        Name        Variance Std.Dev.
##  stimulus_name (Intercept) 0.9785   0.9892  
##  run_id        (Intercept) 2.5360   1.5925  
##  Residual                  1.1856   1.0889  
## Number of obs: 398, groups:  stimulus_name, 30; run_id, 14
## 
## Fixed effects:
##                             Estimate Std. Error       df t value Pr(>|t|)    
## (Intercept)                   4.4237     0.4697  18.2779   9.419 1.94e-08 ***
## predictabilityUnpredictable   0.1863     0.1134 357.3725   1.643    0.101    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Correlation of Fixed Effects:
##             (Intr)
## prdctbltyUn -0.132

anova(MEM_mean_Gaussian)

## Type III Analysis of Variance Table with Satterthwaite's method
##                Sum Sq Mean Sq NumDF  DenDF F value Pr(>F)
## predictability 3.1998  3.1998     1 357.37  2.6988 0.1013

pred <- ggpredict(MEM_mean_Gaussian, terms = "predictability")
ggplot(pred, aes(x = x, y = predicted)) + geom_point(size = 3) + geom_line(aes(group = 1), linewidth = 1) +
  geom_errorbar(aes(ymin = conf.low, ymax = conf.high), width = 0.1, linewidth = 0.8) +
  labs(x = "Predictability", y = "Predicted mean boundary count", title = "Mixed-effects Model Predictions") +
  theme_minimal(base_size = 14)

### MEM with Poisson Distribution

model_pois <- glmer(boundary_count ~ predictability + (1 | run_id) + (1 | stimulus_name), family = poisson,
data = segmentation_data)

summary(model_pois)

## Generalized linear mixed model fit by maximum likelihood (Laplace
##   Approximation) [glmerMod]
##  Family: poisson  ( log )
## Formula: boundary_count ~ predictability + (1 | run_id) + (1 | stimulus_name)
##    Data: segmentation_data
## 
##       AIC       BIC    logLik -2*log(L)  df.resid 
##    1516.3    1532.2    -754.1    1508.3       394 
## 
## Scaled residuals: 
##      Min       1Q   Median       3Q      Max 
## -1.72822 -0.32129 -0.04172  0.26558  1.63957 
## 
## Random effects:
##  Groups        Name        Variance Std.Dev.
##  stimulus_name (Intercept) 0.0373   0.1931  
##  run_id        (Intercept) 0.1098   0.3314  
## Number of obs: 398, groups:  stimulus_name, 30; run_id, 14
## 
## Fixed effects:
##                             Estimate Std. Error z value Pr(>|z|)    
## (Intercept)                  1.41610    0.10230   13.84   <2e-16 ***
## predictabilityUnpredictable  0.04116    0.04898    0.84    0.401    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Correlation of Fixed Effects:
##             (Intr)
## prdctbltyUn -0.264

pred_pois <- ggpredict(model_pois, terms = "predictability")

## You are calculating adjusted predictions on the population-level (i.e.
##   `type = "fixed"`) for a *generalized* linear mixed model.
##   This may produce biased estimates due to Jensen's inequality. Consider
##   setting `bias_correction = TRUE` to correct for this bias.
##   See also the documentation of the `bias_correction` argument.

ggplot(pred_pois, aes(x = x, y = predicted)) + geom_point(size = 3) + geom_line(aes(group = 1), linewidth = 1) +
  geom_errorbar(aes(ymin = conf.low, ymax = conf.high), width = 0.1, linewidth = 0.8) +
  labs(x = "Predictability", y = "Predicted mean boundary count", title = "Poisson Mixed-effects Model Predictions")+
  theme_minimal(base_size = 14)

overdispersion_ratio <-
    sum(residuals(model_pois, type = "pearson")^2) / df.residual(model_pois)
overdispersion_ratio

## [1] 0.2428319

——————-Analyzsis on Pre/Critical/Post——————-

Interval Analysis: 3 Seperate Regression

window_counts <- clean_data %>%
  filter(trial_kind == "segmentation_video") %>%
  select(run_id, stimulus_name, predictability, boundary_times_sec) %>%
  left_join(critical_windows, by = "stimulus_name") %>%
  left_join(video_duration, by = "stimulus_name") %>% rowwise() %>%
  mutate(
    boundary_times = list(jsonlite::fromJSON(boundary_times_sec)),
    pre_count = sum(boundary_times >= 0 & boundary_times < critical_start),
    critical_count = sum(boundary_times >= critical_start & boundary_times <= critical_end),
    post_count = sum(boundary_times > critical_end & boundary_times <= video_duration_sec)) %>%
  ungroup() %>%
  mutate(predictability = factor(predictability), run_id = factor(run_id), stimulus_name = factor(stimulus_name))

model_pre <- lmer(pre_count ~ predictability + (1 | run_id) + (1 | stimulus_name), data = window_counts)
model_critical <- lmer(critical_count ~ predictability + (1 | run_id) + (1 | stimulus_name), data = window_counts)
model_post <- lmer(post_count ~ predictability + (1 | run_id) + (1 | stimulus_name), data = window_counts)

summary(model_pre)

## Linear mixed model fit by REML. t-tests use Satterthwaite's method [
## lmerModLmerTest]
## Formula: pre_count ~ predictability + (1 | run_id) + (1 | stimulus_name)
##    Data: window_counts
## 
## REML criterion at convergence: 2098
## 
## Scaled residuals: 
##     Min      1Q  Median      3Q     Max 
## -2.8271 -0.5986 -0.0260  0.5526  4.7852 
## 
## Random effects:
##  Groups        Name        Variance Std.Dev.
##  stimulus_name (Intercept) 0.4732   0.6879  
##  run_id        (Intercept) 0.6177   0.7859  
##  Residual                  0.6305   0.7941  
## Number of obs: 820, groups:  stimulus_name, 30; run_id, 14
## 
## Fixed effects:
##                              Estimate Std. Error        df t value Pr(>|t|)    
## (Intercept)                 1.891e+00  2.478e-01 2.295e+01   7.628 9.76e-08 ***
## predictabilityUnpredictable 7.763e-03  5.549e-02 7.761e+02   0.140    0.889    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Correlation of Fixed Effects:
##             (Intr)
## prdctbltyUn -0.112

summary(model_critical)

## Linear mixed model fit by REML. t-tests use Satterthwaite's method [
## lmerModLmerTest]
## Formula: critical_count ~ predictability + (1 | run_id) + (1 | stimulus_name)
##    Data: window_counts
## 
## REML criterion at convergence: 1482.3
## 
## Scaled residuals: 
##     Min      1Q  Median      3Q     Max 
## -3.0713 -0.6845 -0.0376  0.6520  6.3505 
## 
## Random effects:
##  Groups        Name        Variance Std.Dev.
##  stimulus_name (Intercept) 0.13793  0.3714  
##  run_id        (Intercept) 0.07269  0.2696  
##  Residual                  0.30891  0.5558  
## Number of obs: 820, groups:  stimulus_name, 30; run_id, 14
## 
## Fixed effects:
##                              Estimate Std. Error        df t value Pr(>|t|)    
## (Intercept)                   0.66233    0.10268  34.29174   6.451 2.17e-07 ***
## predictabilityUnpredictable   0.04392    0.03884 776.06592   1.131    0.258    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Correlation of Fixed Effects:
##             (Intr)
## prdctbltyUn -0.189

summary(model_post)

## Linear mixed model fit by REML. t-tests use Satterthwaite's method [
## lmerModLmerTest]
## Formula: post_count ~ predictability + (1 | run_id) + (1 | stimulus_name)
##    Data: window_counts
## 
## REML criterion at convergence: 2126.9
## 
## Scaled residuals: 
##     Min      1Q  Median      3Q     Max 
## -3.9520 -0.5874 -0.0382  0.6044  5.5076 
## 
## Random effects:
##  Groups        Name        Variance Std.Dev.
##  stimulus_name (Intercept) 0.4324   0.6576  
##  run_id        (Intercept) 0.4811   0.6936  
##  Residual                  0.6592   0.8119  
## Number of obs: 820, groups:  stimulus_name, 30; run_id, 14
## 
## Fixed effects:
##                              Estimate Std. Error        df t value Pr(>|t|)    
## (Intercept)                   2.00600    0.22446  24.61226   8.937 3.38e-09 ***
## predictabilityUnpredictable  -0.04839    0.05674 776.05930  -0.853    0.394    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Correlation of Fixed Effects:
##             (Intr)
## prdctbltyUn -0.126

#Plot
window_counts_long <- window_counts %>%
  select(run_id, stimulus_name, predictability, pre_count, critical_count, post_count) %>%
  pivot_longer( cols = c(pre_count, critical_count, post_count), names_to = "window", values_to = "boundary_count" ) %>%
  mutate(
    window = recode( window, pre_count = "Pre-critical", critical_count = "Critical", post_count = "Post-critical"),
    window = factor(window, levels = c("Pre-critical", "Critical", "Post-critical")),
    predictability = factor(predictability, levels = c("Predictable", "Unpredictable")))

window_means <- window_counts_long %>%
  group_by(window, predictability) %>%
  summarise( mean_boundary_count = mean(boundary_count, na.rm = TRUE), se = sd(boundary_count, na.rm = TRUE) / sqrt(n()), .groups = "drop")

ggplot(window_means, aes(x = predictability, y = mean_boundary_count, group = window, color = window)) +
  geom_line(linewidth = 1.3) + geom_point(size = 3.5) +
  geom_errorbar(aes(ymin = mean_boundary_count - se, ymax = mean_boundary_count + se), width = 0.08, linewidth = 0.7) +
  scale_color_manual(values = c("Pre-critical" = "#1b9e77", "Critical" = "#d95f02", "Post-critical" = "#7570b3")) +
  labs(x = NULL, y = "Mean boundary count", color = "Video window", title = "Boundary Counts Across Predictability Conditions") +
  theme_minimal(base_size = 14)

Interval Analysis: 1 Regression

# Create counts for each video window
window_counts <- clean_data %>% filter(trial_kind == "segmentation_video") %>%
  select(run_id, stimulus_name, predictability, boundary_times_sec) %>%
  left_join(critical_windows, by = "stimulus_name") %>% left_join(video_duration, by = "stimulus_name") %>% rowwise() %>%
  mutate(boundary_times = list(jsonlite::fromJSON(boundary_times_sec)),
    pre_count = sum(boundary_times >= 0 & boundary_times < critical_start),
    critical_count = sum(boundary_times >= critical_start & boundary_times <= critical_end),
    post_count = sum(boundary_times > critical_end & boundary_times <= video_duration_sec) ) %>%
  ungroup()

# Convert to long format: one row per trial × interval
window_counts_long <- window_counts %>%
  select(run_id, stimulus_name, predictability, pre_count, critical_count, post_count) %>%
  pivot_longer(cols = c(pre_count, critical_count, post_count), names_to = "Interval", values_to = "boundary_count") %>%
  mutate(
    Interval = recode(Interval, pre_count = "Pre-critical", critical_count = "Critical", post_count = "Post-critical"),
    Interval = factor(Interval, levels = c("Pre-critical", "Critical", "Post-critical")),
    predictability = factor(predictability, levels = c("Predictable", "Unpredictable")),
    run_id = factor(run_id), stimulus_name = factor(stimulus_name))

# One mixed-effects model with Interval and predictability as fixed effects
model_interval_int <- lmer(boundary_count ~ predictability * Interval + (1 | run_id) + (1 | stimulus_name), data = window_counts_long)
model_interval_add <- lmer(boundary_count ~ predictability + Interval + (1 | run_id) + (1 | stimulus_name), data = window_counts_long)

summary(model_interval_int)

## Linear mixed model fit by REML. t-tests use Satterthwaite's method [
## lmerModLmerTest]
## Formula: boundary_count ~ predictability * Interval + (1 | run_id) + (1 |  
##     stimulus_name)
##    Data: window_counts_long
## 
## REML criterion at convergence: 6723.8
## 
## Scaled residuals: 
##     Min      1Q  Median      3Q     Max 
## -3.8209 -0.6609 -0.0937  0.5387  5.4577 
## 
## Random effects:
##  Groups        Name        Variance Std.Dev.
##  stimulus_name (Intercept) 0.09109  0.3018  
##  run_id        (Intercept) 0.31593  0.5621  
##  Residual                  0.85061  0.9223  
## Number of obs: 2460, groups:  stimulus_name, 30; run_id, 14
## 
## Fixed effects:
##                                                     Estimate Std. Error
## (Intercept)                                          1.88539    0.16635
## predictabilityUnpredictable                          0.01299    0.06443
## IntervalCritical                                    -1.22628    0.06434
## IntervalPost-critical                                0.12895    0.06434
## predictabilityUnpredictable:IntervalCritical         0.03557    0.09110
## predictabilityUnpredictable:IntervalPost-critical   -0.07027    0.09110
##                                                           df t value Pr(>|t|)
## (Intercept)                                         18.78034  11.334 7.73e-10
## predictabilityUnpredictable                       2412.07880   0.202   0.8402
## IntervalCritical                                  2411.94645 -19.060  < 2e-16
## IntervalPost-critical                             2411.94645   2.004   0.0451
## predictabilityUnpredictable:IntervalCritical      2411.94645   0.390   0.6962
## predictabilityUnpredictable:IntervalPost-critical 2411.94645  -0.771   0.4405
##                                                      
## (Intercept)                                       ***
## predictabilityUnpredictable                          
## IntervalCritical                                  ***
## IntervalPost-critical                             *  
## predictabilityUnpredictable:IntervalCritical         
## predictabilityUnpredictable:IntervalPost-critical    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Correlation of Fixed Effects:
##             (Intr) prdctU IntrvC IntrP- prU:IC
## prdctbltyUn -0.193                            
## IntrvlCrtcl -0.193  0.499                     
## IntrvlPst-c -0.193  0.499  0.500              
## prdctblU:IC  0.137 -0.707 -0.706 -0.353       
## prdctbU:IP-  0.137 -0.707 -0.353 -0.706  0.500

summary(model_interval_add)

## Linear mixed model fit by REML. t-tests use Satterthwaite's method [
## lmerModLmerTest]
## Formula: boundary_count ~ predictability + Interval + (1 | run_id) + (1 |  
##     stimulus_name)
##    Data: window_counts_long
## 
## REML criterion at convergence: 6719
## 
## Scaled residuals: 
##     Min      1Q  Median      3Q     Max 
## -3.7897 -0.6592 -0.0948  0.5406  5.4264 
## 
## Random effects:
##  Groups        Name        Variance Std.Dev.
##  stimulus_name (Intercept) 0.09109  0.3018  
##  run_id        (Intercept) 0.31593  0.5621  
##  Residual                  0.85039  0.9222  
## Number of obs: 2460, groups:  stimulus_name, 30; run_id, 14
## 
## Fixed effects:
##                               Estimate Std. Error         df t value Pr(>|t|)
## (Intercept)                  1.891e+00  1.643e-01  1.786e+01  11.512 1.08e-09
## predictabilityUnpredictable  1.423e-03  3.720e-02  2.414e+03   0.038   0.9695
## IntervalCritical            -1.209e+00  4.554e-02  2.414e+03 -26.536  < 2e-16
## IntervalPost-critical        9.390e-02  4.554e-02  2.414e+03   2.062   0.0393
##                                
## (Intercept)                 ***
## predictabilityUnpredictable    
## IntervalCritical            ***
## IntervalPost-critical       *  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Correlation of Fixed Effects:
##             (Intr) prdctU IntrvC
## prdctbltyUn -0.113              
## IntrvlCrtcl -0.139  0.000       
## IntrvlPst-c -0.139  0.000  0.500

model_interval_add_ml <- lmer(boundary_count ~ predictability + Interval + (1 | run_id) + (1 | stimulus_name), data = window_counts_long, REML = FALSE)

model_interval_int_ml <- lmer(boundary_count ~ predictability * Interval + (1 | run_id) + (1 | stimulus_name), data = window_counts_long, REML = FALSE)

anova(model_interval_add_ml, model_interval_int_ml)

## Data: window_counts_long
## Models:
## model_interval_add_ml: boundary_count ~ predictability + Interval + (1 | run_id) + (1 | stimulus_name)
## model_interval_int_ml: boundary_count ~ predictability * Interval + (1 | run_id) + (1 | stimulus_name)
##                       npar    AIC    BIC  logLik -2*log(L)  Chisq Df Pr(>Chisq)
## model_interval_add_ml    7 6717.4 6758.1 -3351.7    6703.4                     
## model_interval_int_ml    9 6720.0 6772.3 -3351.0    6702.0 1.4008  2     0.4964

pred_Int_add <- ggpredict(model_interval_add, terms = c("predictability", "Interval"))

ggplot(pred_Int_add, aes(x = x, y = predicted, color = group, group = group)) +
  geom_point(size = 3) +
  geom_line(linewidth = 1.2) + geom_errorbar(aes(ymin = conf.low, ymax = conf.high), width = .08) +
  labs(x = "Predictability", y = "Predicted boundary count", color = "Interval", title = "Predicted Boundary Count by Predictability and Interval") +
  theme_minimal(base_size = 14)

emm <- emmeans(model_interval_int, ~ predictability * Interval)

## Cannot use mode = "kenward-roger" because *pbkrtest* package is not installed

emm_df <- as.data.frame(emm)

ggplot(emm_df, aes(x = predictability, y = emmean, color = Interval, group = Interval)) +
  geom_point(size = 3) +
  geom_line(linewidth = 1.2) +
  geom_errorbar(aes(ymin = lower.CL, ymax = upper.CL), width = 0.08) +
  labs(x = "Predictability", y = "Predicted boundary count", color = "Interval", title = "Predicted Boundary Count by Predictability x Interval") +
  theme_minimal(base_size = 14)

SegmentationA2B1P1

Oliver Gu

2026-06-29

Exp1: ESegmentation Pilot Analysis

——————————-Exclude Participant——————————-

————————————-Clean Data————————————-

———————————Descriptive Data———————————

Participant level

Average NoB

—————————–Mixed Effect Regression—————————–

MEM for the effect of Predictability on NoB

——————-Analyzsis on Pre/Critical/Post——————-

Interval Analysis: 3 Seperate Regression

Interval Analysis: 1 Regression