Data Loading and Preparation

data <- read.csv("combined_INA_experiment_1.csv")
cat("Number of unique subjects:", length(unique(data$Subject)), "\n")

## Number of unique subjects: 312

subjects_over_1 <- unique(data$Subject[data$Trial_num > 1])
print("Subjects with trial numbers > 1:")

## [1] "Subjects with trial numbers > 1:"

print(subjects_over_1)

## [1] "SAX_MAVEW_1021" "SAX_MAVEW_2004" "SAX_MAVEW_2052" "SAX_MAVEW_2094"

data$Subject <- as.factor(data$Subject)
data$Texture <- as.factor(data$Texture)
data$Direction <- as.factor(data$Direction)
data$Gender <- as.factor(data$Gender)
data$ExcludeTrial <- as.logical(data$ExcludeTrial)

# Locomotor_group -1=pre-walking, 0=crawling, 1=walking
# Crawling_group: Has crawling experience (crawlers + walkers) vs. pre-walking
data$Crawling_group <- ifelse(data$Locomotor_group >= 0, 1, -1)
# Walking_group: Has walking experience (walkers only) vs. others  
data$Walking_group <- ifelse(data$Locomotor_group == 1, 1, -1)

# standardize age (mean zero, unit variance)
data$Age_standardized <- scale(data$Age)[,1]

# rescale trial number: only for subjects with >16 trials (trial numbers > 1)
data$Trial_num_rescaled <- data$Trial_num  # Start with original values
subjects_to_rescale <- c("SAX_MAVEW_2004", "SAX_MAVEW_2052", "SAX_MAVEW_2094", "SAX_MAVEW_1021")

for(subject in subjects_to_rescale) {
  subject_rows <- data$Subject == subject
  if(any(subject_rows)) {
    subject_trials <- data$Trial_num[subject_rows]
    data$Trial_num_rescaled[subject_rows] <- (subject_trials - min(subject_trials)) / 
                                            (max(subject_trials) - min(subject_trials))
  }
}

# days since walking onset, rescaled 0-1
# for subjects without walking experience, set to 0
data$Nav_experience <- ifelse(is.na(data$WalkExp), 0, data$WalkExp)
data$Nav_experience_rescaled <- data$Nav_experience / max(data$Nav_experience, na.rm = TRUE)

# door position (left vs right)
data$Door_side <- ifelse(data$Direction == "R", 1, -1)  # Door on right=1, left=-1

# if Texture_side = 1: High SF texture is on door side -> propLookHighSF = propLookDoor
# if Texture_side = -1: High SF texture is on painting side -> propLookHighSF = 1 - propLookDoor
data$propLookHighSF <- ifelse(data$Texture_side == 1, 
                              data$propLookDoor, 
                              1 - data$propLookDoor)

data_clean <- data[!data$ExcludeTrial, ]
cat("After excluding trials:", nrow(data_clean), "observations from", 
    length(unique(data_clean$Subject)), "subjects\n")

## After excluding trials: 4281 observations from 312 subjects

summary_vars <- data_clean %>%
  select(propLookDoor, Age, Age_standardized, Trial_num_rescaled, 
         Nav_experience_rescaled, Crawling_group, Walking_group, Texture_side) %>%
  summary()

print(summary_vars)

##   propLookDoor         Age      Age_standardized   Trial_num_rescaled
##  Min.   :0.0000   Min.   :126   Min.   :-1.73208   Min.   :0.0000    
##  1st Qu.:0.3191   1st Qu.:217   1st Qu.:-0.92423   1st Qu.:0.2000    
##  Median :0.5262   Median :289   Median :-0.28505   Median :0.4667    
##  Mean   :0.5202   Mean   :320   Mean   :-0.01023   Mean   :0.4908    
##  3rd Qu.:0.7333   3rd Qu.:397   3rd Qu.: 0.67373   3rd Qu.:0.7333    
##  Max.   :1.0000   Max.   :581   Max.   : 2.30719   Max.   :1.0000    
##  Nav_experience_rescaled Crawling_group    Walking_group    
##  Min.   :0.00000         Min.   :-1.0000   Min.   :-1.0000  
##  1st Qu.:0.00000         1st Qu.:-1.0000   1st Qu.:-1.0000  
##  Median :0.00000         Median : 1.0000   Median :-1.0000  
##  Mean   :0.05114         Mean   : 0.4403   Mean   :-0.4071  
##  3rd Qu.:0.00000         3rd Qu.: 1.0000   3rd Qu.: 1.0000  
##  Max.   :1.00000         Max.   : 1.0000   Max.   : 1.0000  
##   Texture_side      
##  Min.   :-1.000000  
##  1st Qu.:-1.000000  
##  Median :-1.000000  
##  Mean   :-0.006307  
##  3rd Qu.: 1.000000  
##  Max.   : 1.000000

1. Full sample preregistered analyses

1.1 Additional Analysis 1 from MAVEW preregistration: proportion looking to door

# Prop. Looking to Door ~ Crawling_group + Walking_group + Age + Texture_side + Trial_num + (1|Subject)
model1 <- lmer(propLookDoor ~ Crawling_group + Walking_group + Age_standardized + 
               Texture_side + Trial_num_rescaled + (1|Subject), 
               data = data_clean)
summary(model1)

## Linear mixed model fit by REML. t-tests use Satterthwaite's method [
## lmerModLmerTest]
## Formula: propLookDoor ~ Crawling_group + Walking_group + Age_standardized +  
##     Texture_side + Trial_num_rescaled + (1 | Subject)
##    Data: data_clean
## 
## REML criterion at convergence: 408.6
## 
## Scaled residuals: 
##      Min       1Q   Median       3Q      Max 
## -2.57465 -0.70048 -0.00145  0.73547  2.35598 
## 
## Random effects:
##  Groups   Name        Variance Std.Dev.
##  Subject  (Intercept) 0.00000  0.0000  
##  Residual             0.06373  0.2525  
## Number of obs: 4281, groups:  Subject, 312
## 
## Fixed effects:
##                      Estimate Std. Error         df t value Pr(>|t|)    
## (Intercept)         5.372e-01  8.028e-03  4.275e+03  66.914   <2e-16 ***
## Crawling_group     -4.702e-03  5.224e-03  4.275e+03  -0.900   0.3682    
## Walking_group       8.943e-03  6.657e-03  4.275e+03   1.343   0.1792    
## Age_standardized    1.289e-02  6.731e-03  4.275e+03   1.914   0.0556 .  
## Texture_side        9.020e-02  3.862e-03  4.275e+03  23.360   <2e-16 ***
## Trial_num_rescaled -2.146e-02  1.257e-02  4.275e+03  -1.707   0.0879 .  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Correlation of Fixed Effects:
##             (Intr) Crwln_ Wlkng_ Ag_stn Txtr_s
## Crawlng_grp -0.264                            
## Walking_grp  0.312  0.062                     
## Ag_stndrdzd -0.115 -0.437 -0.720              
## Texture_sid  0.034  0.001  0.005 -0.003       
## Trl_nm_rscl -0.766 -0.007  0.002  0.007 -0.039
## optimizer (nloptwrap) convergence code: 0 (OK)
## boundary (singular) fit: see help('isSingular')

model1_results <- tidy(model1, conf.int = TRUE, effects = "fixed")
print(model1_results)

## # A tibble: 6 × 9
##   effect term    estimate std.error statistic    df   p.value conf.low conf.high
##   <chr>  <chr>      <dbl>     <dbl>     <dbl> <dbl>     <dbl>    <dbl>     <dbl>
## 1 fixed  (Inter…  0.537     0.00803    66.9   4275. 0          5.21e-1   0.553  
## 2 fixed  Crawli… -0.00470   0.00522    -0.900 4275. 3.68e-  1 -1.49e-2   0.00554
## 3 fixed  Walkin…  0.00894   0.00666     1.34  4275. 1.79e-  1 -4.11e-3   0.0220 
## 4 fixed  Age_st…  0.0129    0.00673     1.91  4275. 5.56e-  2 -3.10e-4   0.0261 
## 5 fixed  Textur…  0.0902    0.00386    23.4   4275. 1.10e-113  8.26e-2   0.0978 
## 6 fixed  Trial_… -0.0215    0.0126     -1.71  4275. 8.79e-  2 -4.61e-2   0.00319

model1_std <- lmer(scale(propLookDoor) ~ scale(Crawling_group) + scale(Walking_group) + 
                   scale(Age_standardized) + scale(Texture_side) + 
                   scale(Trial_num_rescaled) + (1|Subject), 
                   data = data_clean)

model1_std_results <- tidy(model1_std, effects = "fixed")
# print("Standardized coefficients:")
# print(model1_std_results)

coefs <- model1_results

# Does the walking group differ from the average of pre-walking + crawling groups?
walking_effect <- coefs[coefs$term == "Walking_group", ]
walking_p_one_tailed <- walking_effect$p.value / 2
cat("Walking group effect: β =", round(walking_effect$estimate, 4), 
    ", one-tailed p =", round(walking_p_one_tailed, 4), "\n")

## Walking group effect: β = 0.0089 , one-tailed p = 0.0896

# Does the crawling group differ from the average of pre-crawling group?
age_effect <- coefs[coefs$term == "Age_standardized", ]
age_p_one_tailed <- age_effect$p.value / 2
cat("Age effect: β =", round(age_effect$estimate, 4), 
    ", one-tailed p =", round(age_p_one_tailed, 4), "\n")

## Age effect: β = 0.0129 , one-tailed p = 0.0278

# Texture side effect (expecting positive when high SF on door side)
texture_effect <- coefs[coefs$term == "Texture_side", ]
texture_p_one_tailed <- texture_effect$p.value / 2
cat("Texture side effect: β =", round(texture_effect$estimate, 4), 
    ", one-tailed p =", round(texture_p_one_tailed, 4), "\n")

## Texture side effect: β = 0.0902 , one-tailed p = 0

# Crawling group effect (expecting no effect)
crawling_effect <- coefs[coefs$term == "Crawling_group", ]
crawling_p_one_tailed <- crawling_effect$p.value / 2
cat("Crawling group effect: β =", round(crawling_effect$estimate, 4), 
    ", one-tailed p =", round(crawling_effect$p.value, 4), "\n")

## Crawling group effect: β = -0.0047 , one-tailed p = 0.3682

1.2 Additional Analysis 2 from MAVEW preregistration: proportion looking to high SF

# Prop. Looking to High SF ~ Crawling_group + Walking_group + Age + Door_side + Trial_num + (1|Subject)
model2 <- lmer(propLookHighSF ~ Crawling_group + Walking_group + Age_standardized + 
               Door_side + Trial_num_rescaled + (1|Subject), 
               data = data_clean)
summary(model2)

## Linear mixed model fit by REML. t-tests use Satterthwaite's method [
## lmerModLmerTest]
## Formula: propLookHighSF ~ Crawling_group + Walking_group + Age_standardized +  
##     Door_side + Trial_num_rescaled + (1 | Subject)
##    Data: data_clean
## 
## REML criterion at convergence: 448.2
## 
## Scaled residuals: 
##      Min       1Q   Median       3Q      Max 
## -2.43257 -0.65406  0.08865  0.75486  1.80041 
## 
## Random effects:
##  Groups   Name        Variance  Std.Dev.
##  Subject  (Intercept) 0.0009848 0.03138 
##  Residual             0.0634417 0.25188 
## Number of obs: 4281, groups:  Subject, 312
## 
## Fixed effects:
##                      Estimate Std. Error         df t value Pr(>|t|)    
## (Intercept)         5.956e-01  8.354e-03  1.296e+03  71.297   <2e-16 ***
## Crawling_group      6.294e-03  5.791e-03  2.630e+02   1.087    0.278    
## Walking_group      -1.641e-03  7.375e-03  2.647e+02  -0.222    0.824    
## Age_standardized   -7.524e-03  7.441e-03  2.763e+02  -1.011    0.313    
## Door_side          -6.144e-03  3.851e-03  3.986e+03  -1.596    0.111    
## Trial_num_rescaled -1.912e-02  1.254e-02  4.027e+03  -1.524    0.128    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Correlation of Fixed Effects:
##             (Intr) Crwln_ Wlkng_ Ag_stn Dor_sd
## Crawlng_grp -0.277                            
## Walking_grp  0.329  0.064                     
## Ag_stndrdzd -0.121 -0.441 -0.722              
## Door_side   -0.011  0.003  0.000 -0.002       
## Trl_nm_rscl -0.733 -0.007  0.002  0.006  0.010

model2_results <- tidy(model2, conf.int = TRUE, effects = "fixed")
# print(model2_results)

coefs2 <- model2_results

walking_effect2 <- coefs2[coefs2$term == "Walking_group", ]
cat("Walking group effect on High SF looking: β =", round(walking_effect2$estimate, 4), 
    ", two-tailed p =", round(walking_effect2$p.value, 4), "\n")

## Walking group effect on High SF looking: β = -0.0016 , two-tailed p = 0.8241

crawling_effect2 <- coefs2[coefs2$term == "Crawling_group", ]
cat("Crawling group effect on High SF looking: β =", round(crawling_effect2$estimate, 4), 
    ", two-tailed p =", round(crawling_effect2$p.value, 4), "\n")

## Crawling group effect on High SF looking: β = 0.0063 , two-tailed p = 0.2781

1.3 Visualizations

p1 <- ggplot(data_clean, aes(x = Age, y = propLookDoor, color = factor(Locomotor_group))) +
  geom_point(alpha = 0.3) +
  geom_smooth(method = "lm", se = TRUE) +
  scale_color_manual(name = "Locomotor Group", 
                     labels = c("-1" = "Pre-locomoting", "0" = "Crawling", "1" = "Walking"),
                     values = c("-1" = "#0173B2",   # Blue
                                "0" = "#DE8F05",    # Orange  
                                "1" = "#CC78BC")) + # Pink/Purple
  labs(title = "Proportion Looking to Door by Age and Locomotor Group",
       x = "Age (days)", y = "Proportion Looking to Door") +
  theme_minimal()
print(p1)

p2 <- ggplot(data_clean, aes(x = factor(Texture_side), y = propLookDoor)) +
  geom_boxplot() +
  geom_jitter(alpha = 0.2, width = 0.2) +
  scale_x_discrete(labels = c("-1" = "High SF on Painting Side", "1" = "High SF on Door Side")) +
  labs(title = "Proportion Looking to Door by Texture Side",
       x = "Texture Side Condition", y = "Proportion Looking to Door") +
  theme_minimal()
print(p2)

p3 <- ggplot(data_clean, aes(x = Trial_num_rescaled, y = propLookDoor)) +
  geom_point(alpha = 0.2) +
  geom_smooth(method = "lm", se = TRUE) +
  labs(title = "Proportion Looking to Door Across Trials",
       x = "Trial Number (rescaled 0-1)", y = "Proportion Looking to Door") +
  theme_minimal()
print(p3)

2. Not preregistered analyses

2.1 Preregistered 1.1 without age

model_no_age <- lmer(propLookDoor ~ Crawling_group + Walking_group + 
                     Texture_side + Trial_num_rescaled + (1|Subject), 
                     data = data_clean)

summary(model_no_age)

## Linear mixed model fit by REML. t-tests use Satterthwaite's method [
## lmerModLmerTest]
## Formula: propLookDoor ~ Crawling_group + Walking_group + Texture_side +  
##     Trial_num_rescaled + (1 | Subject)
##    Data: data_clean
## 
## REML criterion at convergence: 404.1
## 
## Scaled residuals: 
##      Min       1Q   Median       3Q      Max 
## -2.55621 -0.70240  0.00051  0.73371  2.34173 
## 
## Random effects:
##  Groups   Name        Variance Std.Dev.
##  Subject  (Intercept) 0.00000  0.0000  
##  Residual             0.06377  0.2525  
## Number of obs: 4281, groups:  Subject, 312
## 
## Fixed effects:
##                      Estimate Std. Error         df t value Pr(>|t|)    
## (Intercept)         5.389e-01  7.978e-03  4.276e+03  67.558  < 2e-16 ***
## Crawling_group     -3.323e-04  4.701e-03  4.276e+03  -0.071   0.9436    
## Walking_group       1.812e-02  4.621e-03  4.276e+03   3.921 8.95e-05 ***
## Texture_side        9.023e-02  3.863e-03  4.276e+03  23.358  < 2e-16 ***
## Trial_num_rescaled -2.163e-02  1.258e-02  4.276e+03  -1.720   0.0856 .  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Correlation of Fixed Effects:
##             (Intr) Crwln_ Wlkng_ Txtr_s
## Crawlng_grp -0.352                     
## Walking_grp  0.333 -0.405              
## Texture_sid  0.034  0.000  0.004       
## Trl_nm_rscl -0.770 -0.004  0.010 -0.039
## optimizer (nloptwrap) convergence code: 0 (OK)
## boundary (singular) fit: see help('isSingular')

no_age_results <- tidy(model_no_age, conf.int = TRUE, effects = "fixed")
# print("Results without age predictor:")
# print(no_age_results)

walking_effect_no_age <- no_age_results[no_age_results$term == "Walking_group", ]
walking_p_one_tailed <- walking_effect_no_age$p.value / 2

cat("With age included: β = 0.0089, one-tailed p = 0.0896\n")

## With age included: β = 0.0089, one-tailed p = 0.0896

cat("Without age:       β =", round(walking_effect_no_age$estimate, 4), 
    ", one-tailed p =", round(walking_p_one_tailed, 4), "\n")

## Without age:       β = 0.0181 , one-tailed p = 0

2.2 Preregistered 1.1 with walking no crawling

model1_walking_only <- lmer(propLookDoor ~ Walking_group + Age_standardized + 
               Texture_side + Trial_num_rescaled + (1|Subject), 
               data = data_clean)
summary(model1_walking_only)

## Linear mixed model fit by REML. t-tests use Satterthwaite's method [
## lmerModLmerTest]
## Formula: propLookDoor ~ Walking_group + Age_standardized + Texture_side +  
##     Trial_num_rescaled + (1 | Subject)
##    Data: data_clean
## 
## REML criterion at convergence: 400.8
## 
## Scaled residuals: 
##      Min       1Q   Median       3Q      Max 
## -2.56642 -0.70131  0.00003  0.73984  2.36841 
## 
## Random effects:
##  Groups   Name        Variance Std.Dev.
##  Subject  (Intercept) 0.00000  0.0000  
##  Residual             0.06373  0.2524  
## Number of obs: 4281, groups:  Subject, 312
## 
## Fixed effects:
##                      Estimate Std. Error         df t value Pr(>|t|)    
## (Intercept)         5.353e-01  7.742e-03  4.276e+03  69.135   <2e-16 ***
## Walking_group       9.314e-03  6.644e-03  4.276e+03   1.402   0.1610    
## Age_standardized    1.024e-02  6.054e-03  4.276e+03   1.691   0.0909 .  
## Texture_side        9.021e-02  3.861e-03  4.276e+03  23.361   <2e-16 ***
## Trial_num_rescaled -2.154e-02  1.257e-02  4.276e+03  -1.713   0.0867 .  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Correlation of Fixed Effects:
##             (Intr) Wlkng_ Ag_stn Txtr_s
## Walking_grp  0.341                     
## Ag_stndrdzd -0.265 -0.772              
## Texture_sid  0.036  0.005 -0.003       
## Trl_nm_rscl -0.796  0.002  0.004 -0.039
## optimizer (nloptwrap) convergence code: 0 (OK)
## boundary (singular) fit: see help('isSingular')

2.3 Subject-level locomotor group effect

subject_data <- data_clean %>%
  group_by(Subject) %>%
  summarise(
    propLookDoor_mean = mean(propLookDoor, na.rm = TRUE),
    Age_standardized = first(Age_standardized),  # Same for all trials
    Walking_group = first(Walking_group),
    Crawling_group = first(Crawling_group),
    Locomotor_group = first(Locomotor_group),
    n_trials = n(),
    .groups = 'drop'
  )

# no random effects since one obs per subject
model_subject <- lm(propLookDoor_mean ~ Walking_group + Crawling_group + Age_standardized, 
                    data = subject_data)

summary(model_subject)

## 
## Call:
## lm(formula = propLookDoor_mean ~ Walking_group + Crawling_group + 
##     Age_standardized, data = subject_data)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.34582 -0.03365  0.00363  0.03194  0.29075 
## 
## Coefficients:
##                   Estimate Std. Error t value Pr(>|t|)    
## (Intercept)       0.523940   0.004542 115.347   <2e-16 ***
## Walking_group     0.007948   0.006293   1.263   0.2075    
## Crawling_group   -0.003351   0.004913  -0.682   0.4958    
## Age_standardized  0.014487   0.006217   2.330   0.0205 *  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.06239 on 308 degrees of freedom
## Multiple R-squared:  0.09641,    Adjusted R-squared:  0.0876 
## F-statistic: 10.95 on 3 and 308 DF,  p-value: 7.445e-07

# subject_results <- tidy(model_subject, conf.int = TRUE)
# print(subject_results)

2.4 Modeling navigation experience as continuous

2.4.1 Walking experience as continuous variable only

# walking experience as continuous variable
walking_data <- data_clean[data_clean$Locomotor_group == 1, ]
nav_model <- lmer(propLookDoor ~ Nav_experience_rescaled + Age_standardized + 
                    Texture_side + Trial_num_rescaled + (1|Subject), 
                    data = walking_data)
summary(nav_model)

## Linear mixed model fit by REML. t-tests use Satterthwaite's method [
## lmerModLmerTest]
## Formula: 
## propLookDoor ~ Nav_experience_rescaled + Age_standardized + Texture_side +  
##     Trial_num_rescaled + (1 | Subject)
##    Data: walking_data
## 
## REML criterion at convergence: 40.5
## 
## Scaled residuals: 
##      Min       1Q   Median       3Q      Max 
## -2.64641 -0.65035 -0.00337  0.70870  2.29975 
## 
## Random effects:
##  Groups   Name        Variance Std.Dev.
##  Subject  (Intercept) 0.00000  0.000   
##  Residual             0.05905  0.243   
## Number of obs: 1269, groups:  Subject, 100
## 
## Fixed effects:
##                           Estimate Std. Error         df t value Pr(>|t|)    
## (Intercept)              5.548e-01  1.853e-02  1.264e+03  29.945   <2e-16 ***
## Nav_experience_rescaled -1.367e-02  2.857e-02  1.264e+03  -0.479    0.632    
## Age_standardized         8.468e-03  1.042e-02  1.264e+03   0.813    0.416    
## Texture_side             8.272e-02  6.830e-03  1.264e+03  12.112   <2e-16 ***
## Trial_num_rescaled      -3.364e-02  2.222e-02  1.264e+03  -1.514    0.130    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Correlation of Fixed Effects:
##             (Intr) Nv_xp_ Ag_stn Txtr_s
## Nv_xprnc_rs -0.280                     
## Ag_stndrdzd -0.671  0.012              
## Texture_sid  0.032  0.007 -0.003       
## Trl_nm_rscl -0.590  0.009  0.005 -0.047
## optimizer (nloptwrap) convergence code: 0 (OK)
## boundary (singular) fit: see help('isSingular')

ggplot(walking_data, aes(x = Nav_experience, y = propLookDoor)) +
    geom_point(alpha = 0.3) +
    geom_smooth(method = "lm", se = TRUE) +
    labs(title = "Door Looking by Navigation Experience (Walking Infants Only)",
         x = "Walking Experience (days)", y = "Proportion Looking to Door") +
    theme_minimal()

2.4.2 Locomoting experience as continuous variable

# locomoting exp as continuous variable
data$CrawlExp_continuous <- ifelse(data$Locomotor_group == -1, 0,  # Pre-walkers = 0
                                  ifelse(is.na(data$CrawlExp), 0,  # Missing = 0  
                                         data$CrawlExp))           # Actual days

data$CrawlExp_rescaled <- data$CrawlExp_continuous / max(data$CrawlExp_continuous, na.rm = TRUE)
data_clean <- data[!data$ExcludeTrial, ]
cat("Full sample size:", nrow(data_clean), "trials from", 
    length(unique(data_clean$Subject)), "subjects\n")

## Full sample size: 4281 trials from 312 subjects

cat("Crawling experience distribution:\n")

## Crawling experience distribution:

print(summary(data_clean$CrawlExp_continuous))

##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##    0.00    0.00   27.00   63.89  111.00  370.00

crawl_exp_full_model <- lmer(propLookDoor ~ CrawlExp_rescaled + Age_standardized + 
                            Texture_side + Trial_num_rescaled + (1|Subject), 
                            data = data_clean)

summary(crawl_exp_full_model)

## Linear mixed model fit by REML. t-tests use Satterthwaite's method [
## lmerModLmerTest]
## Formula: propLookDoor ~ CrawlExp_rescaled + Age_standardized + Texture_side +  
##     Trial_num_rescaled + (1 | Subject)
##    Data: data_clean
## 
## REML criterion at convergence: 400.6
## 
## Scaled residuals: 
##      Min       1Q   Median       3Q      Max 
## -2.56388 -0.69587 -0.00092  0.74069  2.38431 
## 
## Random effects:
##  Groups   Name        Variance Std.Dev.
##  Subject  (Intercept) 0.00000  0.0000  
##  Residual             0.06376  0.2525  
## Number of obs: 4281, groups:  Subject, 312
## 
## Fixed effects:
##                      Estimate Std. Error         df t value Pr(>|t|)    
## (Intercept)         5.314e-01  8.046e-03  4.276e+03  66.047   <2e-16 ***
## CrawlExp_rescaled   1.088e-03  1.954e-02  4.276e+03   0.056   0.9556    
## Age_standardized    1.668e-02  4.285e-03  4.276e+03   3.894   0.0001 ***
## Texture_side        9.018e-02  3.862e-03  4.276e+03  23.350   <2e-16 ***
## Trial_num_rescaled -2.158e-02  1.257e-02  4.276e+03  -1.716   0.0863 .  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Correlation of Fixed Effects:
##             (Intr) CrwlE_ Ag_stn Txtr_s
## CrwlExp_rsc -0.426                     
## Ag_stndrdzd  0.185 -0.439              
## Texture_sid  0.033  0.001  0.001       
## Trl_nm_rscl -0.769  0.005  0.006 -0.039
## optimizer (nloptwrap) convergence code: 0 (OK)
## boundary (singular) fit: see help('isSingular')

crawl_full_results <- tidy(crawl_exp_full_model, conf.int = TRUE, effects = "fixed")
# print("Crawling experience results (full sample):")
# print(crawl_full_results)

crawl_full_effect <- crawl_full_results[crawl_full_results$term == "CrawlExp_rescaled", ]
cat("Crawling experience effect: β =", round(crawl_full_effect$estimate, 4), 
    ", p =", round(crawl_full_effect$p.value, 4), "\n")

## Crawling experience effect: β = 0.0011 , p = 0.9556

ggplot(data_clean, aes(x = CrawlExp_continuous, y = propLookDoor)) +
  geom_point(alpha = 0.3) +
  geom_smooth(method = "lm", se = TRUE) +
  labs(title = "Door Looking by Crawling Experience (All Babies)",
       x = "Crawling Experience (days)", y = "Proportion Looking to Door") +
  theme_minimal()

2.5 Does door looking preference still exist?

2.5.1 Walking group

walking_only <- data_clean[data_clean$Locomotor_group == 1, ]

cat("Walking group sample size:", nrow(walking_only), "trials from", 
    length(unique(walking_only$Subject)), "subjects\n")

## Walking group sample size: 1269 trials from 100 subjects

walking_chance_test <- t.test(walking_only$propLookDoor, mu = 0.5)
print(walking_chance_test)

## 
##  One Sample t-test
## 
## data:  walking_only$propLookDoor
## t = 6.2509, df = 1268, p-value = 5.565e-10
## alternative hypothesis: true mean is not equal to 0.5
## 95 percent confidence interval:
##  0.5308817 0.5591328
## sample estimates:
## mean of x 
## 0.5450073

cat("\nWalking group door looking:\n")

## 
## Walking group door looking:

cat("Mean proportion:", round(mean(walking_only$propLookDoor), 4), "\n")

## Mean proportion: 0.545

cat("Standard deviation:", round(sd(walking_only$propLookDoor), 4), "\n")

## Standard deviation: 0.2565

cat("95% CI:", round(walking_chance_test$conf.int[1], 4), "to", 
    round(walking_chance_test$conf.int[2], 4), "\n")

## 95% CI: 0.5309 to 0.5591

cohens_d <- (mean(walking_only$propLookDoor) - 0.5) / sd(walking_only$propLookDoor)
cat("Effect size (Cohen's d):", round(cohens_d, 4), "\n")

## Effect size (Cohen's d): 0.1755

# subject level for completeness
walking_subject_means <- walking_only %>%
  group_by(Subject) %>%
  summarise(propLookDoor_mean = mean(propLookDoor), .groups = 'drop')

subject_chance_test <- t.test(walking_subject_means$propLookDoor_mean, mu = 0.5)
cat("\nSubject-level test against chance:\n")

## 
## Subject-level test against chance:

cat("Mean:", round(mean(walking_subject_means$propLookDoor_mean), 4), 
    ", p =", round(subject_chance_test$p.value, 4), "\n")

## Mean: 0.5467 , p = 0

2.5.2 Crawling group

crawling_only <- data_clean[data_clean$Locomotor_group == 0, ]

cat("Crawling group sample size:", nrow(crawling_only), "trials from", 
    length(unique(crawling_only$Subject)), "subjects\n")

## Crawling group sample size: 1814 trials from 117 subjects

crawling_chance_test <- t.test(crawling_only$propLookDoor, mu = 0.5)
print(crawling_chance_test)

## 
##  One Sample t-test
## 
## data:  crawling_only$propLookDoor
## t = 1.529, df = 1813, p-value = 0.1265
## alternative hypothesis: true mean is not equal to 0.5
## 95 percent confidence interval:
##  0.4973085 0.5217288
## sample estimates:
## mean of x 
## 0.5095187

cat("\nCrawling group door looking:\n")

## 
## Crawling group door looking:

cat("Mean proportion:", round(mean(crawling_only$propLookDoor), 4), "\n")

## Mean proportion: 0.5095

cat("Standard deviation:", round(sd(crawling_only$propLookDoor), 4), "\n")

## Standard deviation: 0.2652

cat("95% CI:", round(crawling_chance_test$conf.int[1], 4), "to", 
    round(crawling_chance_test$conf.int[2], 4), "\n")

## 95% CI: 0.4973 to 0.5217

cohens_d <- (mean(crawling_only$propLookDoor) - 0.5) / sd(crawling_only$propLookDoor)
cat("Effect size (Cohen's d):", round(cohens_d, 4), "\n")

## Effect size (Cohen's d): 0.0359

crawling_subject_means <- crawling_only %>%
  group_by(Subject) %>%
  summarise(propLookDoor_mean = mean(propLookDoor), .groups = 'drop')

subject_chance_test <- t.test(crawling_subject_means$propLookDoor_mean, mu = 0.5)
cat("\nSubject-level test against chance:\n")

## 
## Subject-level test against chance:

cat("Mean:", round(mean(crawling_subject_means$propLookDoor_mean), 4), 
    ", p =", round(subject_chance_test$p.value, 4), "\n")

## Mean: 0.5092 , p = 0.0224

2.5.3 Pre-locomoting group

prewalk_only <- data_clean[data_clean$Locomotor_group == -1, ]
cat("Pre-walking group sample size:", nrow(prewalk_only), "trials from", 
    length(unique(prewalk_only$Subject)), "subjects\n")

## Pre-walking group sample size: 1198 trials from 95 subjects

prewalk_chance_test <- t.test(prewalk_only$propLookDoor, mu = 0.5)
print(prewalk_chance_test)

## 
##  One Sample t-test
## 
## data:  prewalk_only$propLookDoor
## t = 1.2482, df = 1197, p-value = 0.2122
## alternative hypothesis: true mean is not equal to 0.5
## 95 percent confidence interval:
##  0.4941409 0.5263520
## sample estimates:
## mean of x 
## 0.5102464

cat("Pre-walking door looking mean:", round(mean(prewalk_only$propLookDoor), 4), "\n")

## Pre-walking door looking mean: 0.5102

cohens_d_prewalk <- (mean(prewalk_only$propLookDoor) - 0.5) / sd(prewalk_only$propLookDoor)
cat("Effect size (Cohen's d):", round(cohens_d_prewalk, 4), "\n")

## Effect size (Cohen's d): 0.0361

3. Data quality checks

3.1 Individual differences? Split half prediction

# library(dplyr)
# 
# comprehensive_split_half <- function(data, max_combinations_per_subject = 1000) {
#   subject_trial_counts <- data %>%
#     group_by(Subject) %>%
#     summarise(n_trials = n(), .groups = 'drop') %>%
#     filter(n_trials >= 4)
#   
#   cat("Subjects with not fewer than 4 trials: ", nrow(subject_trial_counts))
#   
#   # get all split combinations for one subject
#   get_split_combinations <- function(n_trials, max_combinations) {
#     half_size <- floor(n_trials / 2)
#     
#     # all possible combinations
#     all_combinations <- combn(1:n_trials, half_size, simplify = FALSE)
#     
#     # sample if too many combinations
#     if(length(all_combinations) > max_combinations) {
#       sampled_indices <- sample(1:length(all_combinations), max_combinations)
#       all_combinations <- all_combinations[sampled_indices]
#     }
#     
#     return(all_combinations)
#   }
#   
#   subject_splits <- list()
#   for(i in 1:nrow(subject_trial_counts)) {
#     subject_id <- subject_trial_counts$Subject[i]
#     n_trials <- subject_trial_counts$n_trials[i]
#     
#     subject_splits[[as.character(subject_id)]] <- get_split_combinations(n_trials, max_combinations_per_subject)
#   }
#   
#   # total number of split configurations to test
#   n_configurations <- prod(sapply(subject_splits, length))
#   max_configs_to_test <- 10000  # Limit for computational feasibility
#   
#   if(n_configurations > max_configs_to_test) {
#     cat("Sampling ", max_configs_to_test, "\n")
#     n_to_sample <- max_configs_to_test
#   } else {
#     cat("Testing", n_configurations, "total split configurations\n")
#     n_to_sample <- n_configurations
#   }
#   
#   correlations <- numeric(n_to_sample)
#   
#   for(config in 1:n_to_sample) {
#     half1_means <- numeric(length(subject_splits))
#     half2_means <- numeric(length(subject_splits))
#     
#     # for each subject in this configuration
#     for(j in 1:length(subject_splits)) {
#       subject_id <- names(subject_splits)[j]
#       subject_data <- data[data$Subject == subject_id, ]
#       
#       # randomly sample one split for this subject in this configuration
#       split_options <- subject_splits[[j]]
#       chosen_split <- sample(split_options, 1)[[1]]
#       
#       # calculate half means
#       half1_indices <- chosen_split
#       half2_indices <- setdiff(1:nrow(subject_data), half1_indices)
#       
#       half1_means[j] <- mean(subject_data$propLookDoor[half1_indices])
#       half2_means[j] <- mean(subject_data$propLookDoor[half2_indices])
#     }
#     
#     # calculate correlation for this configuration
#     correlations[config] <- cor(half1_means, half2_means, use = "complete.obs")
#     
#     if(config %% 1000 == 0) cat("Completed", config, "configurations\n")
#   }
#   
#   return(correlations)
# }
# 
# real_correlations <- comprehensive_split_half(data_clean)
# 
# mean_real_correlation <- mean(real_correlations, na.rm = TRUE)
# cat("Mean correlation across all split configurations:", round(mean_real_correlation, 3), "\n")
# cat("Range:", round(min(real_correlations, na.rm = TRUE), 3), "to", 
#     round(max(real_correlations, na.rm = TRUE), 3), "\n")
# 
# cat("\nGenerating null distribution...\n")
# n_null_iterations <- 1000
# null_correlations <- numeric(n_null_iterations)
# 
# subject_data_for_null <- data_clean %>%
#   group_by(Subject) %>%
#   summarise(
#     n_trials = n(),
#     trials = list(propLookDoor),
#     .groups = 'drop'
#   ) %>%
#   filter(n_trials >= 4)
# 
# for(i in 1:n_null_iterations) {
#   half1_means <- numeric(nrow(subject_data_for_null))
#   half2_means <- numeric(nrow(subject_data_for_null))
#   
#   for(j in 1:nrow(subject_data_for_null)) {
#     trials <- subject_data_for_null$trials[[j]]
#     n_trials <- length(trials)
#     half_size <- floor(n_trials / 2)
#     
#     # random assignment
#     shuffled_indices <- sample(1:n_trials)
#     half1_indices <- shuffled_indices[1:half_size]
#     half2_indices <- shuffled_indices[(half_size + 1):n_trials]
#     
#     half1_means[j] <- mean(trials[half1_indices])
#     half2_means[j] <- mean(trials[half2_indices])
#   }
#   
#   null_correlations[i] <- cor(half1_means, half2_means)
# }
# 
# cat("Mean null correlation:", round(mean(null_correlations), 3), "\n")
# cat("P-value (two-tailed):", 
#     mean(abs(null_correlations) >= abs(mean_real_correlation)), "\n")
# 
# par(mfrow = c(2, 1))
# 
# # distribution
# hist(real_correlations, main = "Distribution of Real Split-Half Correlations", 
#      xlab = "Correlation", breaks = 30, col = "lightblue")
# abline(v = mean_real_correlation, col = "red", lwd = 2)
# 
# hist(null_correlations, main = "Real vs. Null Correlations", 
#      xlab = "Correlation", breaks = 30, col = "lightgray", 
#      xlim = range(c(real_correlations, null_correlations)))
# abline(v = mean_real_correlation, col = "red", lwd = 2, 
#        label = paste("Real =", round(mean_real_correlation, 3)))
# abline(v = mean(null_correlations), col = "blue", lwd = 2)
# legend("topright", c("Real correlation", "Null mean"), 
#        col = c("red", "blue"), lwd = 2)
# 
# par(mfrow = c(1, 1))

3.2 Is the walking group effect stable to individual noise at all? Bootstrapping

# library(boot)
# walking_effect_boot <- function(data, indices) {
#   boot_data <- data[indices, ]
#   walking_mean <- mean(boot_data$propLookDoor[boot_data$Walking_group == 1])
#   other_mean <- mean(boot_data$propLookDoor[boot_data$Walking_group == -1])
#   return(walking_mean - other_mean)
# }
# boot_result <- boot(data_clean, walking_effect_boot, R = 1000)
# boot.ci(boot_result)

Do locomoting babies look more to doorways? Full sample analysis

Haoyu Du

2025-06-24

Data Loading and Preparation

1. Full sample preregistered analyses

1.1 Additional Analysis 1 from MAVEW preregistration: proportion looking to door

1.2 Additional Analysis 2 from MAVEW preregistration: proportion looking to high SF

1.3 Visualizations

2. Not preregistered analyses

2.1 Preregistered 1.1 without age

2.2 Preregistered 1.1 with walking no crawling

2.3 Subject-level locomotor group effect

2.4 Modeling navigation experience as continuous

2.4.1 Walking experience as continuous variable only

2.4.2 Locomoting experience as continuous variable

2.5 Does door looking preference still exist?

2.5.1 Walking group

2.5.2 Crawling group

2.5.3 Pre-locomoting group

3. Data quality checks

3.1 Individual differences? Split half prediction

3.2 Is the walking group effect stable to individual noise at all? Bootstrapping