Data Loading and Preparation

data <- read.csv("combined_INA_experiment_1.csv")
cat("Number of unique subjects:", length(unique(data$Subject)), "\n")
## Number of unique subjects: 312
subjects_over_1 <- unique(data$Subject[data$Trial_num > 1])
print("Subjects with trial numbers > 1:")
## [1] "Subjects with trial numbers > 1:"
print(subjects_over_1)
## [1] "SAX_MAVEW_1021" "SAX_MAVEW_2004" "SAX_MAVEW_2052" "SAX_MAVEW_2094"
data$Subject <- as.factor(data$Subject)
data$Texture <- as.factor(data$Texture)
data$Direction <- as.factor(data$Direction)
data$Gender <- as.factor(data$Gender)
data$ExcludeTrial <- as.logical(data$ExcludeTrial)

# Locomotor_group -1=pre-walking, 0=crawling, 1=walking
# Crawling_group: Has crawling experience (crawlers + walkers) vs. pre-walking
data$Crawling_group <- ifelse(data$Locomotor_group >= 0, 1, -1)
# Walking_group: Has walking experience (walkers only) vs. others  
data$Walking_group <- ifelse(data$Locomotor_group == 1, 1, -1)

# standardize age (mean zero, unit variance)
data$Age_standardized <- scale(data$Age)[,1]

# rescale trial number: only for subjects with >16 trials (trial numbers > 1)
data$Trial_num_rescaled <- data$Trial_num  # Start with original values
subjects_to_rescale <- c("SAX_MAVEW_2004", "SAX_MAVEW_2052", "SAX_MAVEW_2094", "SAX_MAVEW_1021")

for(subject in subjects_to_rescale) {
  subject_rows <- data$Subject == subject
  if(any(subject_rows)) {
    subject_trials <- data$Trial_num[subject_rows]
    data$Trial_num_rescaled[subject_rows] <- (subject_trials - min(subject_trials)) / 
                                            (max(subject_trials) - min(subject_trials))
  }
}

# days since walking onset, rescaled 0-1
# for subjects without walking experience, set to 0
data$Nav_experience <- ifelse(is.na(data$WalkExp), 0, data$WalkExp)
data$Nav_experience_rescaled <- data$Nav_experience / max(data$Nav_experience, na.rm = TRUE)

# door position (left vs right)
data$Door_side <- ifelse(data$Direction == "R", 1, -1)  # Door on right=1, left=-1

# if Texture_side = 1: High SF texture is on door side -> propLookHighSF = propLookDoor
# if Texture_side = -1: High SF texture is on painting side -> propLookHighSF = 1 - propLookDoor
data$propLookHighSF <- ifelse(data$Texture_side == 1, 
                              data$propLookDoor, 
                              1 - data$propLookDoor)

data_clean <- data[!data$ExcludeTrial, ]
cat("After excluding trials:", nrow(data_clean), "observations from", 
    length(unique(data_clean$Subject)), "subjects\n")
## After excluding trials: 4281 observations from 312 subjects
summary_vars <- data_clean %>%
  select(propLookDoor, Age, Age_standardized, Trial_num_rescaled, 
         Nav_experience_rescaled, Crawling_group, Walking_group, Texture_side) %>%
  summary()

print(summary_vars)
##   propLookDoor         Age      Age_standardized   Trial_num_rescaled
##  Min.   :0.0000   Min.   :126   Min.   :-1.73208   Min.   :0.0000    
##  1st Qu.:0.3191   1st Qu.:217   1st Qu.:-0.92423   1st Qu.:0.2000    
##  Median :0.5262   Median :289   Median :-0.28505   Median :0.4667    
##  Mean   :0.5202   Mean   :320   Mean   :-0.01023   Mean   :0.4908    
##  3rd Qu.:0.7333   3rd Qu.:397   3rd Qu.: 0.67373   3rd Qu.:0.7333    
##  Max.   :1.0000   Max.   :581   Max.   : 2.30719   Max.   :1.0000    
##  Nav_experience_rescaled Crawling_group    Walking_group    
##  Min.   :0.00000         Min.   :-1.0000   Min.   :-1.0000  
##  1st Qu.:0.00000         1st Qu.:-1.0000   1st Qu.:-1.0000  
##  Median :0.00000         Median : 1.0000   Median :-1.0000  
##  Mean   :0.05114         Mean   : 0.4403   Mean   :-0.4071  
##  3rd Qu.:0.00000         3rd Qu.: 1.0000   3rd Qu.: 1.0000  
##  Max.   :1.00000         Max.   : 1.0000   Max.   : 1.0000  
##   Texture_side      
##  Min.   :-1.000000  
##  1st Qu.:-1.000000  
##  Median :-1.000000  
##  Mean   :-0.006307  
##  3rd Qu.: 1.000000  
##  Max.   : 1.000000

1. Full sample preregistered analyses

1.1 Additional Analysis 1 from MAVEW preregistration: proportion looking to door

# Prop. Looking to Door ~ Crawling_group + Walking_group + Age + Texture_side + Trial_num + (1|Subject)
model1 <- lmer(propLookDoor ~ Crawling_group + Walking_group + Age_standardized + 
               Texture_side + Trial_num_rescaled + (1|Subject), 
               data = data_clean)
summary(model1)
## Linear mixed model fit by REML. t-tests use Satterthwaite's method [
## lmerModLmerTest]
## Formula: propLookDoor ~ Crawling_group + Walking_group + Age_standardized +  
##     Texture_side + Trial_num_rescaled + (1 | Subject)
##    Data: data_clean
## 
## REML criterion at convergence: 408.6
## 
## Scaled residuals: 
##      Min       1Q   Median       3Q      Max 
## -2.57465 -0.70048 -0.00145  0.73547  2.35598 
## 
## Random effects:
##  Groups   Name        Variance Std.Dev.
##  Subject  (Intercept) 0.00000  0.0000  
##  Residual             0.06373  0.2525  
## Number of obs: 4281, groups:  Subject, 312
## 
## Fixed effects:
##                      Estimate Std. Error         df t value Pr(>|t|)    
## (Intercept)         5.372e-01  8.028e-03  4.275e+03  66.914   <2e-16 ***
## Crawling_group     -4.702e-03  5.224e-03  4.275e+03  -0.900   0.3682    
## Walking_group       8.943e-03  6.657e-03  4.275e+03   1.343   0.1792    
## Age_standardized    1.289e-02  6.731e-03  4.275e+03   1.914   0.0556 .  
## Texture_side        9.020e-02  3.862e-03  4.275e+03  23.360   <2e-16 ***
## Trial_num_rescaled -2.146e-02  1.257e-02  4.275e+03  -1.707   0.0879 .  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Correlation of Fixed Effects:
##             (Intr) Crwln_ Wlkng_ Ag_stn Txtr_s
## Crawlng_grp -0.264                            
## Walking_grp  0.312  0.062                     
## Ag_stndrdzd -0.115 -0.437 -0.720              
## Texture_sid  0.034  0.001  0.005 -0.003       
## Trl_nm_rscl -0.766 -0.007  0.002  0.007 -0.039
## optimizer (nloptwrap) convergence code: 0 (OK)
## boundary (singular) fit: see help('isSingular')
model1_results <- tidy(model1, conf.int = TRUE, effects = "fixed")
print(model1_results)
## # A tibble: 6 × 9
##   effect term    estimate std.error statistic    df   p.value conf.low conf.high
##   <chr>  <chr>      <dbl>     <dbl>     <dbl> <dbl>     <dbl>    <dbl>     <dbl>
## 1 fixed  (Inter…  0.537     0.00803    66.9   4275. 0          5.21e-1   0.553  
## 2 fixed  Crawli… -0.00470   0.00522    -0.900 4275. 3.68e-  1 -1.49e-2   0.00554
## 3 fixed  Walkin…  0.00894   0.00666     1.34  4275. 1.79e-  1 -4.11e-3   0.0220 
## 4 fixed  Age_st…  0.0129    0.00673     1.91  4275. 5.56e-  2 -3.10e-4   0.0261 
## 5 fixed  Textur…  0.0902    0.00386    23.4   4275. 1.10e-113  8.26e-2   0.0978 
## 6 fixed  Trial_… -0.0215    0.0126     -1.71  4275. 8.79e-  2 -4.61e-2   0.00319
model1_std <- lmer(scale(propLookDoor) ~ scale(Crawling_group) + scale(Walking_group) + 
                   scale(Age_standardized) + scale(Texture_side) + 
                   scale(Trial_num_rescaled) + (1|Subject), 
                   data = data_clean)

model1_std_results <- tidy(model1_std, effects = "fixed")
# print("Standardized coefficients:")
# print(model1_std_results)
coefs <- model1_results

# Does the walking group differ from the average of pre-walking + crawling groups?
walking_effect <- coefs[coefs$term == "Walking_group", ]
walking_p_one_tailed <- walking_effect$p.value / 2
cat("Walking group effect: β =", round(walking_effect$estimate, 4), 
    ", one-tailed p =", round(walking_p_one_tailed, 4), "\n")
## Walking group effect: β = 0.0089 , one-tailed p = 0.0896
# Does the crawling group differ from the average of pre-crawling group?
age_effect <- coefs[coefs$term == "Age_standardized", ]
age_p_one_tailed <- age_effect$p.value / 2
cat("Age effect: β =", round(age_effect$estimate, 4), 
    ", one-tailed p =", round(age_p_one_tailed, 4), "\n")
## Age effect: β = 0.0129 , one-tailed p = 0.0278
# Texture side effect (expecting positive when high SF on door side)
texture_effect <- coefs[coefs$term == "Texture_side", ]
texture_p_one_tailed <- texture_effect$p.value / 2
cat("Texture side effect: β =", round(texture_effect$estimate, 4), 
    ", one-tailed p =", round(texture_p_one_tailed, 4), "\n")
## Texture side effect: β = 0.0902 , one-tailed p = 0
# Crawling group effect (expecting no effect)
crawling_effect <- coefs[coefs$term == "Crawling_group", ]
crawling_p_one_tailed <- crawling_effect$p.value / 2
cat("Crawling group effect: β =", round(crawling_effect$estimate, 4), 
    ", one-tailed p =", round(crawling_effect$p.value, 4), "\n")
## Crawling group effect: β = -0.0047 , one-tailed p = 0.3682

1.2 Additional Analysis 2 from MAVEW preregistration: proportion looking to high SF

# Prop. Looking to High SF ~ Crawling_group + Walking_group + Age + Door_side + Trial_num + (1|Subject)
model2 <- lmer(propLookHighSF ~ Crawling_group + Walking_group + Age_standardized + 
               Door_side + Trial_num_rescaled + (1|Subject), 
               data = data_clean)
summary(model2)
## Linear mixed model fit by REML. t-tests use Satterthwaite's method [
## lmerModLmerTest]
## Formula: propLookHighSF ~ Crawling_group + Walking_group + Age_standardized +  
##     Door_side + Trial_num_rescaled + (1 | Subject)
##    Data: data_clean
## 
## REML criterion at convergence: 448.2
## 
## Scaled residuals: 
##      Min       1Q   Median       3Q      Max 
## -2.43257 -0.65406  0.08865  0.75486  1.80041 
## 
## Random effects:
##  Groups   Name        Variance  Std.Dev.
##  Subject  (Intercept) 0.0009848 0.03138 
##  Residual             0.0634417 0.25188 
## Number of obs: 4281, groups:  Subject, 312
## 
## Fixed effects:
##                      Estimate Std. Error         df t value Pr(>|t|)    
## (Intercept)         5.956e-01  8.354e-03  1.296e+03  71.297   <2e-16 ***
## Crawling_group      6.294e-03  5.791e-03  2.630e+02   1.087    0.278    
## Walking_group      -1.641e-03  7.375e-03  2.647e+02  -0.222    0.824    
## Age_standardized   -7.524e-03  7.441e-03  2.763e+02  -1.011    0.313    
## Door_side          -6.144e-03  3.851e-03  3.986e+03  -1.596    0.111    
## Trial_num_rescaled -1.912e-02  1.254e-02  4.027e+03  -1.524    0.128    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Correlation of Fixed Effects:
##             (Intr) Crwln_ Wlkng_ Ag_stn Dor_sd
## Crawlng_grp -0.277                            
## Walking_grp  0.329  0.064                     
## Ag_stndrdzd -0.121 -0.441 -0.722              
## Door_side   -0.011  0.003  0.000 -0.002       
## Trl_nm_rscl -0.733 -0.007  0.002  0.006  0.010
model2_results <- tidy(model2, conf.int = TRUE, effects = "fixed")
# print(model2_results)
coefs2 <- model2_results

walking_effect2 <- coefs2[coefs2$term == "Walking_group", ]
cat("Walking group effect on High SF looking: β =", round(walking_effect2$estimate, 4), 
    ", two-tailed p =", round(walking_effect2$p.value, 4), "\n")
## Walking group effect on High SF looking: β = -0.0016 , two-tailed p = 0.8241
crawling_effect2 <- coefs2[coefs2$term == "Crawling_group", ]
cat("Crawling group effect on High SF looking: β =", round(crawling_effect2$estimate, 4), 
    ", two-tailed p =", round(crawling_effect2$p.value, 4), "\n")
## Crawling group effect on High SF looking: β = 0.0063 , two-tailed p = 0.2781

1.3 Visualizations

p1 <- ggplot(data_clean, aes(x = Age, y = propLookDoor, color = factor(Locomotor_group))) +
  geom_point(alpha = 0.3) +
  geom_smooth(method = "lm", se = TRUE) +
  scale_color_manual(name = "Locomotor Group", 
                     labels = c("-1" = "Pre-locomoting", "0" = "Crawling", "1" = "Walking"),
                     values = c("-1" = "#0173B2",   # Blue
                                "0" = "#DE8F05",    # Orange  
                                "1" = "#CC78BC")) + # Pink/Purple
  labs(title = "Proportion Looking to Door by Age and Locomotor Group",
       x = "Age (days)", y = "Proportion Looking to Door") +
  theme_minimal()
print(p1)

p2 <- ggplot(data_clean, aes(x = factor(Texture_side), y = propLookDoor)) +
  geom_boxplot() +
  geom_jitter(alpha = 0.2, width = 0.2) +
  scale_x_discrete(labels = c("-1" = "High SF on Painting Side", "1" = "High SF on Door Side")) +
  labs(title = "Proportion Looking to Door by Texture Side",
       x = "Texture Side Condition", y = "Proportion Looking to Door") +
  theme_minimal()
print(p2)

p3 <- ggplot(data_clean, aes(x = Trial_num_rescaled, y = propLookDoor)) +
  geom_point(alpha = 0.2) +
  geom_smooth(method = "lm", se = TRUE) +
  labs(title = "Proportion Looking to Door Across Trials",
       x = "Trial Number (rescaled 0-1)", y = "Proportion Looking to Door") +
  theme_minimal()
print(p3)

2. Not preregistered analyses

2.1 Preregistered 1.1 without age

model_no_age <- lmer(propLookDoor ~ Crawling_group + Walking_group + 
                     Texture_side + Trial_num_rescaled + (1|Subject), 
                     data = data_clean)

summary(model_no_age)
## Linear mixed model fit by REML. t-tests use Satterthwaite's method [
## lmerModLmerTest]
## Formula: propLookDoor ~ Crawling_group + Walking_group + Texture_side +  
##     Trial_num_rescaled + (1 | Subject)
##    Data: data_clean
## 
## REML criterion at convergence: 404.1
## 
## Scaled residuals: 
##      Min       1Q   Median       3Q      Max 
## -2.55621 -0.70240  0.00051  0.73371  2.34173 
## 
## Random effects:
##  Groups   Name        Variance Std.Dev.
##  Subject  (Intercept) 0.00000  0.0000  
##  Residual             0.06377  0.2525  
## Number of obs: 4281, groups:  Subject, 312
## 
## Fixed effects:
##                      Estimate Std. Error         df t value Pr(>|t|)    
## (Intercept)         5.389e-01  7.978e-03  4.276e+03  67.558  < 2e-16 ***
## Crawling_group     -3.323e-04  4.701e-03  4.276e+03  -0.071   0.9436    
## Walking_group       1.812e-02  4.621e-03  4.276e+03   3.921 8.95e-05 ***
## Texture_side        9.023e-02  3.863e-03  4.276e+03  23.358  < 2e-16 ***
## Trial_num_rescaled -2.163e-02  1.258e-02  4.276e+03  -1.720   0.0856 .  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Correlation of Fixed Effects:
##             (Intr) Crwln_ Wlkng_ Txtr_s
## Crawlng_grp -0.352                     
## Walking_grp  0.333 -0.405              
## Texture_sid  0.034  0.000  0.004       
## Trl_nm_rscl -0.770 -0.004  0.010 -0.039
## optimizer (nloptwrap) convergence code: 0 (OK)
## boundary (singular) fit: see help('isSingular')
no_age_results <- tidy(model_no_age, conf.int = TRUE, effects = "fixed")
# print("Results without age predictor:")
# print(no_age_results)

walking_effect_no_age <- no_age_results[no_age_results$term == "Walking_group", ]
walking_p_one_tailed <- walking_effect_no_age$p.value / 2

cat("With age included: β = 0.0089, one-tailed p = 0.0896\n")
## With age included: β = 0.0089, one-tailed p = 0.0896
cat("Without age:       β =", round(walking_effect_no_age$estimate, 4), 
    ", one-tailed p =", round(walking_p_one_tailed, 4), "\n")
## Without age:       β = 0.0181 , one-tailed p = 0

2.2 Preregistered 1.1 with walking no crawling

model1_walking_only <- lmer(propLookDoor ~ Walking_group + Age_standardized + 
               Texture_side + Trial_num_rescaled + (1|Subject), 
               data = data_clean)
summary(model1_walking_only)
## Linear mixed model fit by REML. t-tests use Satterthwaite's method [
## lmerModLmerTest]
## Formula: propLookDoor ~ Walking_group + Age_standardized + Texture_side +  
##     Trial_num_rescaled + (1 | Subject)
##    Data: data_clean
## 
## REML criterion at convergence: 400.8
## 
## Scaled residuals: 
##      Min       1Q   Median       3Q      Max 
## -2.56642 -0.70131  0.00003  0.73984  2.36841 
## 
## Random effects:
##  Groups   Name        Variance Std.Dev.
##  Subject  (Intercept) 0.00000  0.0000  
##  Residual             0.06373  0.2524  
## Number of obs: 4281, groups:  Subject, 312
## 
## Fixed effects:
##                      Estimate Std. Error         df t value Pr(>|t|)    
## (Intercept)         5.353e-01  7.742e-03  4.276e+03  69.135   <2e-16 ***
## Walking_group       9.314e-03  6.644e-03  4.276e+03   1.402   0.1610    
## Age_standardized    1.024e-02  6.054e-03  4.276e+03   1.691   0.0909 .  
## Texture_side        9.021e-02  3.861e-03  4.276e+03  23.361   <2e-16 ***
## Trial_num_rescaled -2.154e-02  1.257e-02  4.276e+03  -1.713   0.0867 .  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Correlation of Fixed Effects:
##             (Intr) Wlkng_ Ag_stn Txtr_s
## Walking_grp  0.341                     
## Ag_stndrdzd -0.265 -0.772              
## Texture_sid  0.036  0.005 -0.003       
## Trl_nm_rscl -0.796  0.002  0.004 -0.039
## optimizer (nloptwrap) convergence code: 0 (OK)
## boundary (singular) fit: see help('isSingular')

2.3 Subject-level locomotor group effect

subject_data <- data_clean %>%
  group_by(Subject) %>%
  summarise(
    propLookDoor_mean = mean(propLookDoor, na.rm = TRUE),
    Age_standardized = first(Age_standardized),  # Same for all trials
    Walking_group = first(Walking_group),
    Crawling_group = first(Crawling_group),
    Locomotor_group = first(Locomotor_group),
    n_trials = n(),
    .groups = 'drop'
  )

# no random effects since one obs per subject
model_subject <- lm(propLookDoor_mean ~ Walking_group + Crawling_group + Age_standardized, 
                    data = subject_data)

summary(model_subject)
## 
## Call:
## lm(formula = propLookDoor_mean ~ Walking_group + Crawling_group + 
##     Age_standardized, data = subject_data)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.34582 -0.03365  0.00363  0.03194  0.29075 
## 
## Coefficients:
##                   Estimate Std. Error t value Pr(>|t|)    
## (Intercept)       0.523940   0.004542 115.347   <2e-16 ***
## Walking_group     0.007948   0.006293   1.263   0.2075    
## Crawling_group   -0.003351   0.004913  -0.682   0.4958    
## Age_standardized  0.014487   0.006217   2.330   0.0205 *  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.06239 on 308 degrees of freedom
## Multiple R-squared:  0.09641,    Adjusted R-squared:  0.0876 
## F-statistic: 10.95 on 3 and 308 DF,  p-value: 7.445e-07
# subject_results <- tidy(model_subject, conf.int = TRUE)
# print(subject_results)

2.4 Modeling navigation experience as continuous

2.4.1 Walking experience as continuous variable only

# walking experience as continuous variable
walking_data <- data_clean[data_clean$Locomotor_group == 1, ]
nav_model <- lmer(propLookDoor ~ Nav_experience_rescaled + Age_standardized + 
                    Texture_side + Trial_num_rescaled + (1|Subject), 
                    data = walking_data)
summary(nav_model)
## Linear mixed model fit by REML. t-tests use Satterthwaite's method [
## lmerModLmerTest]
## Formula: 
## propLookDoor ~ Nav_experience_rescaled + Age_standardized + Texture_side +  
##     Trial_num_rescaled + (1 | Subject)
##    Data: walking_data
## 
## REML criterion at convergence: 40.5
## 
## Scaled residuals: 
##      Min       1Q   Median       3Q      Max 
## -2.64641 -0.65035 -0.00337  0.70870  2.29975 
## 
## Random effects:
##  Groups   Name        Variance Std.Dev.
##  Subject  (Intercept) 0.00000  0.000   
##  Residual             0.05905  0.243   
## Number of obs: 1269, groups:  Subject, 100
## 
## Fixed effects:
##                           Estimate Std. Error         df t value Pr(>|t|)    
## (Intercept)              5.548e-01  1.853e-02  1.264e+03  29.945   <2e-16 ***
## Nav_experience_rescaled -1.367e-02  2.857e-02  1.264e+03  -0.479    0.632    
## Age_standardized         8.468e-03  1.042e-02  1.264e+03   0.813    0.416    
## Texture_side             8.272e-02  6.830e-03  1.264e+03  12.112   <2e-16 ***
## Trial_num_rescaled      -3.364e-02  2.222e-02  1.264e+03  -1.514    0.130    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Correlation of Fixed Effects:
##             (Intr) Nv_xp_ Ag_stn Txtr_s
## Nv_xprnc_rs -0.280                     
## Ag_stndrdzd -0.671  0.012              
## Texture_sid  0.032  0.007 -0.003       
## Trl_nm_rscl -0.590  0.009  0.005 -0.047
## optimizer (nloptwrap) convergence code: 0 (OK)
## boundary (singular) fit: see help('isSingular')
ggplot(walking_data, aes(x = Nav_experience, y = propLookDoor)) +
    geom_point(alpha = 0.3) +
    geom_smooth(method = "lm", se = TRUE) +
    labs(title = "Door Looking by Navigation Experience (Walking Infants Only)",
         x = "Walking Experience (days)", y = "Proportion Looking to Door") +
    theme_minimal()

2.4.2 Locomoting experience as continuous variable

# locomoting exp as continuous variable
data$CrawlExp_continuous <- ifelse(data$Locomotor_group == -1, 0,  # Pre-walkers = 0
                                  ifelse(is.na(data$CrawlExp), 0,  # Missing = 0  
                                         data$CrawlExp))           # Actual days

data$CrawlExp_rescaled <- data$CrawlExp_continuous / max(data$CrawlExp_continuous, na.rm = TRUE)
data_clean <- data[!data$ExcludeTrial, ]
cat("Full sample size:", nrow(data_clean), "trials from", 
    length(unique(data_clean$Subject)), "subjects\n")
## Full sample size: 4281 trials from 312 subjects
cat("Crawling experience distribution:\n")
## Crawling experience distribution:
print(summary(data_clean$CrawlExp_continuous))
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##    0.00    0.00   27.00   63.89  111.00  370.00
crawl_exp_full_model <- lmer(propLookDoor ~ CrawlExp_rescaled + Age_standardized + 
                            Texture_side + Trial_num_rescaled + (1|Subject), 
                            data = data_clean)

summary(crawl_exp_full_model)
## Linear mixed model fit by REML. t-tests use Satterthwaite's method [
## lmerModLmerTest]
## Formula: propLookDoor ~ CrawlExp_rescaled + Age_standardized + Texture_side +  
##     Trial_num_rescaled + (1 | Subject)
##    Data: data_clean
## 
## REML criterion at convergence: 400.6
## 
## Scaled residuals: 
##      Min       1Q   Median       3Q      Max 
## -2.56388 -0.69587 -0.00092  0.74069  2.38431 
## 
## Random effects:
##  Groups   Name        Variance Std.Dev.
##  Subject  (Intercept) 0.00000  0.0000  
##  Residual             0.06376  0.2525  
## Number of obs: 4281, groups:  Subject, 312
## 
## Fixed effects:
##                      Estimate Std. Error         df t value Pr(>|t|)    
## (Intercept)         5.314e-01  8.046e-03  4.276e+03  66.047   <2e-16 ***
## CrawlExp_rescaled   1.088e-03  1.954e-02  4.276e+03   0.056   0.9556    
## Age_standardized    1.668e-02  4.285e-03  4.276e+03   3.894   0.0001 ***
## Texture_side        9.018e-02  3.862e-03  4.276e+03  23.350   <2e-16 ***
## Trial_num_rescaled -2.158e-02  1.257e-02  4.276e+03  -1.716   0.0863 .  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Correlation of Fixed Effects:
##             (Intr) CrwlE_ Ag_stn Txtr_s
## CrwlExp_rsc -0.426                     
## Ag_stndrdzd  0.185 -0.439              
## Texture_sid  0.033  0.001  0.001       
## Trl_nm_rscl -0.769  0.005  0.006 -0.039
## optimizer (nloptwrap) convergence code: 0 (OK)
## boundary (singular) fit: see help('isSingular')
crawl_full_results <- tidy(crawl_exp_full_model, conf.int = TRUE, effects = "fixed")
# print("Crawling experience results (full sample):")
# print(crawl_full_results)

crawl_full_effect <- crawl_full_results[crawl_full_results$term == "CrawlExp_rescaled", ]
cat("Crawling experience effect: β =", round(crawl_full_effect$estimate, 4), 
    ", p =", round(crawl_full_effect$p.value, 4), "\n")
## Crawling experience effect: β = 0.0011 , p = 0.9556
ggplot(data_clean, aes(x = CrawlExp_continuous, y = propLookDoor)) +
  geom_point(alpha = 0.3) +
  geom_smooth(method = "lm", se = TRUE) +
  labs(title = "Door Looking by Crawling Experience (All Babies)",
       x = "Crawling Experience (days)", y = "Proportion Looking to Door") +
  theme_minimal()

2.5 Does door looking preference still exist?

2.5.1 Walking group

walking_only <- data_clean[data_clean$Locomotor_group == 1, ]

cat("Walking group sample size:", nrow(walking_only), "trials from", 
    length(unique(walking_only$Subject)), "subjects\n")
## Walking group sample size: 1269 trials from 100 subjects
walking_chance_test <- t.test(walking_only$propLookDoor, mu = 0.5)
print(walking_chance_test)
## 
##  One Sample t-test
## 
## data:  walking_only$propLookDoor
## t = 6.2509, df = 1268, p-value = 5.565e-10
## alternative hypothesis: true mean is not equal to 0.5
## 95 percent confidence interval:
##  0.5308817 0.5591328
## sample estimates:
## mean of x 
## 0.5450073
cat("\nWalking group door looking:\n")
## 
## Walking group door looking:
cat("Mean proportion:", round(mean(walking_only$propLookDoor), 4), "\n")
## Mean proportion: 0.545
cat("Standard deviation:", round(sd(walking_only$propLookDoor), 4), "\n")
## Standard deviation: 0.2565
cat("95% CI:", round(walking_chance_test$conf.int[1], 4), "to", 
    round(walking_chance_test$conf.int[2], 4), "\n")
## 95% CI: 0.5309 to 0.5591
cohens_d <- (mean(walking_only$propLookDoor) - 0.5) / sd(walking_only$propLookDoor)
cat("Effect size (Cohen's d):", round(cohens_d, 4), "\n")
## Effect size (Cohen's d): 0.1755
# subject level for completeness
walking_subject_means <- walking_only %>%
  group_by(Subject) %>%
  summarise(propLookDoor_mean = mean(propLookDoor), .groups = 'drop')

subject_chance_test <- t.test(walking_subject_means$propLookDoor_mean, mu = 0.5)
cat("\nSubject-level test against chance:\n")
## 
## Subject-level test against chance:
cat("Mean:", round(mean(walking_subject_means$propLookDoor_mean), 4), 
    ", p =", round(subject_chance_test$p.value, 4), "\n")
## Mean: 0.5467 , p = 0

2.5.2 Crawling group

crawling_only <- data_clean[data_clean$Locomotor_group == 0, ]

cat("Crawling group sample size:", nrow(crawling_only), "trials from", 
    length(unique(crawling_only$Subject)), "subjects\n")
## Crawling group sample size: 1814 trials from 117 subjects
crawling_chance_test <- t.test(crawling_only$propLookDoor, mu = 0.5)
print(crawling_chance_test)
## 
##  One Sample t-test
## 
## data:  crawling_only$propLookDoor
## t = 1.529, df = 1813, p-value = 0.1265
## alternative hypothesis: true mean is not equal to 0.5
## 95 percent confidence interval:
##  0.4973085 0.5217288
## sample estimates:
## mean of x 
## 0.5095187
cat("\nCrawling group door looking:\n")
## 
## Crawling group door looking:
cat("Mean proportion:", round(mean(crawling_only$propLookDoor), 4), "\n")
## Mean proportion: 0.5095
cat("Standard deviation:", round(sd(crawling_only$propLookDoor), 4), "\n")
## Standard deviation: 0.2652
cat("95% CI:", round(crawling_chance_test$conf.int[1], 4), "to", 
    round(crawling_chance_test$conf.int[2], 4), "\n")
## 95% CI: 0.4973 to 0.5217
cohens_d <- (mean(crawling_only$propLookDoor) - 0.5) / sd(crawling_only$propLookDoor)
cat("Effect size (Cohen's d):", round(cohens_d, 4), "\n")
## Effect size (Cohen's d): 0.0359
crawling_subject_means <- crawling_only %>%
  group_by(Subject) %>%
  summarise(propLookDoor_mean = mean(propLookDoor), .groups = 'drop')

subject_chance_test <- t.test(crawling_subject_means$propLookDoor_mean, mu = 0.5)
cat("\nSubject-level test against chance:\n")
## 
## Subject-level test against chance:
cat("Mean:", round(mean(crawling_subject_means$propLookDoor_mean), 4), 
    ", p =", round(subject_chance_test$p.value, 4), "\n")
## Mean: 0.5092 , p = 0.0224

2.5.3 Pre-locomoting group

prewalk_only <- data_clean[data_clean$Locomotor_group == -1, ]
cat("Pre-walking group sample size:", nrow(prewalk_only), "trials from", 
    length(unique(prewalk_only$Subject)), "subjects\n")
## Pre-walking group sample size: 1198 trials from 95 subjects
prewalk_chance_test <- t.test(prewalk_only$propLookDoor, mu = 0.5)
print(prewalk_chance_test)
## 
##  One Sample t-test
## 
## data:  prewalk_only$propLookDoor
## t = 1.2482, df = 1197, p-value = 0.2122
## alternative hypothesis: true mean is not equal to 0.5
## 95 percent confidence interval:
##  0.4941409 0.5263520
## sample estimates:
## mean of x 
## 0.5102464
cat("Pre-walking door looking mean:", round(mean(prewalk_only$propLookDoor), 4), "\n")
## Pre-walking door looking mean: 0.5102
cohens_d_prewalk <- (mean(prewalk_only$propLookDoor) - 0.5) / sd(prewalk_only$propLookDoor)
cat("Effect size (Cohen's d):", round(cohens_d_prewalk, 4), "\n")
## Effect size (Cohen's d): 0.0361

3. Data quality checks

3.1 Individual differences? Split half prediction

# library(dplyr)
# 
# comprehensive_split_half <- function(data, max_combinations_per_subject = 1000) {
#   subject_trial_counts <- data %>%
#     group_by(Subject) %>%
#     summarise(n_trials = n(), .groups = 'drop') %>%
#     filter(n_trials >= 4)
#   
#   cat("Subjects with not fewer than 4 trials: ", nrow(subject_trial_counts))
#   
#   # get all split combinations for one subject
#   get_split_combinations <- function(n_trials, max_combinations) {
#     half_size <- floor(n_trials / 2)
#     
#     # all possible combinations
#     all_combinations <- combn(1:n_trials, half_size, simplify = FALSE)
#     
#     # sample if too many combinations
#     if(length(all_combinations) > max_combinations) {
#       sampled_indices <- sample(1:length(all_combinations), max_combinations)
#       all_combinations <- all_combinations[sampled_indices]
#     }
#     
#     return(all_combinations)
#   }
#   
#   subject_splits <- list()
#   for(i in 1:nrow(subject_trial_counts)) {
#     subject_id <- subject_trial_counts$Subject[i]
#     n_trials <- subject_trial_counts$n_trials[i]
#     
#     subject_splits[[as.character(subject_id)]] <- get_split_combinations(n_trials, max_combinations_per_subject)
#   }
#   
#   # total number of split configurations to test
#   n_configurations <- prod(sapply(subject_splits, length))
#   max_configs_to_test <- 10000  # Limit for computational feasibility
#   
#   if(n_configurations > max_configs_to_test) {
#     cat("Sampling ", max_configs_to_test, "\n")
#     n_to_sample <- max_configs_to_test
#   } else {
#     cat("Testing", n_configurations, "total split configurations\n")
#     n_to_sample <- n_configurations
#   }
#   
#   correlations <- numeric(n_to_sample)
#   
#   for(config in 1:n_to_sample) {
#     half1_means <- numeric(length(subject_splits))
#     half2_means <- numeric(length(subject_splits))
#     
#     # for each subject in this configuration
#     for(j in 1:length(subject_splits)) {
#       subject_id <- names(subject_splits)[j]
#       subject_data <- data[data$Subject == subject_id, ]
#       
#       # randomly sample one split for this subject in this configuration
#       split_options <- subject_splits[[j]]
#       chosen_split <- sample(split_options, 1)[[1]]
#       
#       # calculate half means
#       half1_indices <- chosen_split
#       half2_indices <- setdiff(1:nrow(subject_data), half1_indices)
#       
#       half1_means[j] <- mean(subject_data$propLookDoor[half1_indices])
#       half2_means[j] <- mean(subject_data$propLookDoor[half2_indices])
#     }
#     
#     # calculate correlation for this configuration
#     correlations[config] <- cor(half1_means, half2_means, use = "complete.obs")
#     
#     if(config %% 1000 == 0) cat("Completed", config, "configurations\n")
#   }
#   
#   return(correlations)
# }
# 
# real_correlations <- comprehensive_split_half(data_clean)
# 
# mean_real_correlation <- mean(real_correlations, na.rm = TRUE)
# cat("Mean correlation across all split configurations:", round(mean_real_correlation, 3), "\n")
# cat("Range:", round(min(real_correlations, na.rm = TRUE), 3), "to", 
#     round(max(real_correlations, na.rm = TRUE), 3), "\n")
# 
# cat("\nGenerating null distribution...\n")
# n_null_iterations <- 1000
# null_correlations <- numeric(n_null_iterations)
# 
# subject_data_for_null <- data_clean %>%
#   group_by(Subject) %>%
#   summarise(
#     n_trials = n(),
#     trials = list(propLookDoor),
#     .groups = 'drop'
#   ) %>%
#   filter(n_trials >= 4)
# 
# for(i in 1:n_null_iterations) {
#   half1_means <- numeric(nrow(subject_data_for_null))
#   half2_means <- numeric(nrow(subject_data_for_null))
#   
#   for(j in 1:nrow(subject_data_for_null)) {
#     trials <- subject_data_for_null$trials[[j]]
#     n_trials <- length(trials)
#     half_size <- floor(n_trials / 2)
#     
#     # random assignment
#     shuffled_indices <- sample(1:n_trials)
#     half1_indices <- shuffled_indices[1:half_size]
#     half2_indices <- shuffled_indices[(half_size + 1):n_trials]
#     
#     half1_means[j] <- mean(trials[half1_indices])
#     half2_means[j] <- mean(trials[half2_indices])
#   }
#   
#   null_correlations[i] <- cor(half1_means, half2_means)
# }
# 
# cat("Mean null correlation:", round(mean(null_correlations), 3), "\n")
# cat("P-value (two-tailed):", 
#     mean(abs(null_correlations) >= abs(mean_real_correlation)), "\n")
# 
# par(mfrow = c(2, 1))
# 
# # distribution
# hist(real_correlations, main = "Distribution of Real Split-Half Correlations", 
#      xlab = "Correlation", breaks = 30, col = "lightblue")
# abline(v = mean_real_correlation, col = "red", lwd = 2)
# 
# hist(null_correlations, main = "Real vs. Null Correlations", 
#      xlab = "Correlation", breaks = 30, col = "lightgray", 
#      xlim = range(c(real_correlations, null_correlations)))
# abline(v = mean_real_correlation, col = "red", lwd = 2, 
#        label = paste("Real =", round(mean_real_correlation, 3)))
# abline(v = mean(null_correlations), col = "blue", lwd = 2)
# legend("topright", c("Real correlation", "Null mean"), 
#        col = c("red", "blue"), lwd = 2)
# 
# par(mfrow = c(1, 1))

3.2 Is the walking group effect stable to individual noise at all? Bootstrapping

# library(boot)
# walking_effect_boot <- function(data, indices) {
#   boot_data <- data[indices, ]
#   walking_mean <- mean(boot_data$propLookDoor[boot_data$Walking_group == 1])
#   other_mean <- mean(boot_data$propLookDoor[boot_data$Walking_group == -1])
#   return(walking_mean - other_mean)
# }
# boot_result <- boot(data_clean, walking_effect_boot, R = 1000)
# boot.ci(boot_result)