data <- read.csv("combined_INA_experiment_1.csv")
cat("Number of unique subjects:", length(unique(data$Subject)), "\n")
## Number of unique subjects: 312
subjects_over_1 <- unique(data$Subject[data$Trial_num > 1])
print("Subjects with trial numbers > 1:")
## [1] "Subjects with trial numbers > 1:"
print(subjects_over_1)
## [1] "SAX_MAVEW_1021" "SAX_MAVEW_2004" "SAX_MAVEW_2052" "SAX_MAVEW_2094"
data$Subject <- as.factor(data$Subject)
data$Texture <- as.factor(data$Texture)
data$Direction <- as.factor(data$Direction)
data$Gender <- as.factor(data$Gender)
data$ExcludeTrial <- as.logical(data$ExcludeTrial)
# Locomotor_group -1=pre-walking, 0=crawling, 1=walking
# Crawling_group: Has crawling experience (crawlers + walkers) vs. pre-walking
data$Crawling_group <- ifelse(data$Locomotor_group >= 0, 1, -1)
# Walking_group: Has walking experience (walkers only) vs. others
data$Walking_group <- ifelse(data$Locomotor_group == 1, 1, -1)
# standardize age (mean zero, unit variance)
data$Age_standardized <- scale(data$Age)[,1]
# rescale trial number: only for subjects with >16 trials (trial numbers > 1)
data$Trial_num_rescaled <- data$Trial_num # Start with original values
subjects_to_rescale <- c("SAX_MAVEW_2004", "SAX_MAVEW_2052", "SAX_MAVEW_2094", "SAX_MAVEW_1021")
for(subject in subjects_to_rescale) {
subject_rows <- data$Subject == subject
if(any(subject_rows)) {
subject_trials <- data$Trial_num[subject_rows]
data$Trial_num_rescaled[subject_rows] <- (subject_trials - min(subject_trials)) /
(max(subject_trials) - min(subject_trials))
}
}
# days since walking onset, rescaled 0-1
# for subjects without walking experience, set to 0
data$Nav_experience <- ifelse(is.na(data$WalkExp), 0, data$WalkExp)
data$Nav_experience_rescaled <- data$Nav_experience / max(data$Nav_experience, na.rm = TRUE)
# door position (left vs right)
data$Door_side <- ifelse(data$Direction == "R", 1, -1) # Door on right=1, left=-1
# if Texture_side = 1: High SF texture is on door side -> propLookHighSF = propLookDoor
# if Texture_side = -1: High SF texture is on painting side -> propLookHighSF = 1 - propLookDoor
data$propLookHighSF <- ifelse(data$Texture_side == 1,
data$propLookDoor,
1 - data$propLookDoor)
data_clean <- data[!data$ExcludeTrial, ]
cat("After excluding trials:", nrow(data_clean), "observations from",
length(unique(data_clean$Subject)), "subjects\n")
## After excluding trials: 4281 observations from 312 subjects
summary_vars <- data_clean %>%
select(propLookDoor, Age, Age_standardized, Trial_num_rescaled,
Nav_experience_rescaled, Crawling_group, Walking_group, Texture_side) %>%
summary()
print(summary_vars)
## propLookDoor Age Age_standardized Trial_num_rescaled
## Min. :0.0000 Min. :126 Min. :-1.73208 Min. :0.0000
## 1st Qu.:0.3191 1st Qu.:217 1st Qu.:-0.92423 1st Qu.:0.2000
## Median :0.5262 Median :289 Median :-0.28505 Median :0.4667
## Mean :0.5202 Mean :320 Mean :-0.01023 Mean :0.4908
## 3rd Qu.:0.7333 3rd Qu.:397 3rd Qu.: 0.67373 3rd Qu.:0.7333
## Max. :1.0000 Max. :581 Max. : 2.30719 Max. :1.0000
## Nav_experience_rescaled Crawling_group Walking_group
## Min. :0.00000 Min. :-1.0000 Min. :-1.0000
## 1st Qu.:0.00000 1st Qu.:-1.0000 1st Qu.:-1.0000
## Median :0.00000 Median : 1.0000 Median :-1.0000
## Mean :0.05114 Mean : 0.4403 Mean :-0.4071
## 3rd Qu.:0.00000 3rd Qu.: 1.0000 3rd Qu.: 1.0000
## Max. :1.00000 Max. : 1.0000 Max. : 1.0000
## Texture_side
## Min. :-1.000000
## 1st Qu.:-1.000000
## Median :-1.000000
## Mean :-0.006307
## 3rd Qu.: 1.000000
## Max. : 1.000000
# Prop. Looking to Door ~ Crawling_group + Walking_group + Age + Texture_side + Trial_num + (1|Subject)
model1 <- lmer(propLookDoor ~ Crawling_group + Walking_group + Age_standardized +
Texture_side + Trial_num_rescaled + (1|Subject),
data = data_clean)
summary(model1)
## Linear mixed model fit by REML. t-tests use Satterthwaite's method [
## lmerModLmerTest]
## Formula: propLookDoor ~ Crawling_group + Walking_group + Age_standardized +
## Texture_side + Trial_num_rescaled + (1 | Subject)
## Data: data_clean
##
## REML criterion at convergence: 408.6
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -2.57465 -0.70048 -0.00145 0.73547 2.35598
##
## Random effects:
## Groups Name Variance Std.Dev.
## Subject (Intercept) 0.00000 0.0000
## Residual 0.06373 0.2525
## Number of obs: 4281, groups: Subject, 312
##
## Fixed effects:
## Estimate Std. Error df t value Pr(>|t|)
## (Intercept) 5.372e-01 8.028e-03 4.275e+03 66.914 <2e-16 ***
## Crawling_group -4.702e-03 5.224e-03 4.275e+03 -0.900 0.3682
## Walking_group 8.943e-03 6.657e-03 4.275e+03 1.343 0.1792
## Age_standardized 1.289e-02 6.731e-03 4.275e+03 1.914 0.0556 .
## Texture_side 9.020e-02 3.862e-03 4.275e+03 23.360 <2e-16 ***
## Trial_num_rescaled -2.146e-02 1.257e-02 4.275e+03 -1.707 0.0879 .
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Correlation of Fixed Effects:
## (Intr) Crwln_ Wlkng_ Ag_stn Txtr_s
## Crawlng_grp -0.264
## Walking_grp 0.312 0.062
## Ag_stndrdzd -0.115 -0.437 -0.720
## Texture_sid 0.034 0.001 0.005 -0.003
## Trl_nm_rscl -0.766 -0.007 0.002 0.007 -0.039
## optimizer (nloptwrap) convergence code: 0 (OK)
## boundary (singular) fit: see help('isSingular')
model1_results <- tidy(model1, conf.int = TRUE, effects = "fixed")
print(model1_results)
## # A tibble: 6 × 9
## effect term estimate std.error statistic df p.value conf.low conf.high
## <chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 fixed (Inter… 0.537 0.00803 66.9 4275. 0 5.21e-1 0.553
## 2 fixed Crawli… -0.00470 0.00522 -0.900 4275. 3.68e- 1 -1.49e-2 0.00554
## 3 fixed Walkin… 0.00894 0.00666 1.34 4275. 1.79e- 1 -4.11e-3 0.0220
## 4 fixed Age_st… 0.0129 0.00673 1.91 4275. 5.56e- 2 -3.10e-4 0.0261
## 5 fixed Textur… 0.0902 0.00386 23.4 4275. 1.10e-113 8.26e-2 0.0978
## 6 fixed Trial_… -0.0215 0.0126 -1.71 4275. 8.79e- 2 -4.61e-2 0.00319
model1_std <- lmer(scale(propLookDoor) ~ scale(Crawling_group) + scale(Walking_group) +
scale(Age_standardized) + scale(Texture_side) +
scale(Trial_num_rescaled) + (1|Subject),
data = data_clean)
model1_std_results <- tidy(model1_std, effects = "fixed")
# print("Standardized coefficients:")
# print(model1_std_results)
coefs <- model1_results
# Does the walking group differ from the average of pre-walking + crawling groups?
walking_effect <- coefs[coefs$term == "Walking_group", ]
walking_p_one_tailed <- walking_effect$p.value / 2
cat("Walking group effect: β =", round(walking_effect$estimate, 4),
", one-tailed p =", round(walking_p_one_tailed, 4), "\n")
## Walking group effect: β = 0.0089 , one-tailed p = 0.0896
# Does the crawling group differ from the average of pre-crawling group?
age_effect <- coefs[coefs$term == "Age_standardized", ]
age_p_one_tailed <- age_effect$p.value / 2
cat("Age effect: β =", round(age_effect$estimate, 4),
", one-tailed p =", round(age_p_one_tailed, 4), "\n")
## Age effect: β = 0.0129 , one-tailed p = 0.0278
# Texture side effect (expecting positive when high SF on door side)
texture_effect <- coefs[coefs$term == "Texture_side", ]
texture_p_one_tailed <- texture_effect$p.value / 2
cat("Texture side effect: β =", round(texture_effect$estimate, 4),
", one-tailed p =", round(texture_p_one_tailed, 4), "\n")
## Texture side effect: β = 0.0902 , one-tailed p = 0
# Crawling group effect (expecting no effect)
crawling_effect <- coefs[coefs$term == "Crawling_group", ]
crawling_p_one_tailed <- crawling_effect$p.value / 2
cat("Crawling group effect: β =", round(crawling_effect$estimate, 4),
", one-tailed p =", round(crawling_effect$p.value, 4), "\n")
## Crawling group effect: β = -0.0047 , one-tailed p = 0.3682
# Prop. Looking to High SF ~ Crawling_group + Walking_group + Age + Door_side + Trial_num + (1|Subject)
model2 <- lmer(propLookHighSF ~ Crawling_group + Walking_group + Age_standardized +
Door_side + Trial_num_rescaled + (1|Subject),
data = data_clean)
summary(model2)
## Linear mixed model fit by REML. t-tests use Satterthwaite's method [
## lmerModLmerTest]
## Formula: propLookHighSF ~ Crawling_group + Walking_group + Age_standardized +
## Door_side + Trial_num_rescaled + (1 | Subject)
## Data: data_clean
##
## REML criterion at convergence: 448.2
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -2.43257 -0.65406 0.08865 0.75486 1.80041
##
## Random effects:
## Groups Name Variance Std.Dev.
## Subject (Intercept) 0.0009848 0.03138
## Residual 0.0634417 0.25188
## Number of obs: 4281, groups: Subject, 312
##
## Fixed effects:
## Estimate Std. Error df t value Pr(>|t|)
## (Intercept) 5.956e-01 8.354e-03 1.296e+03 71.297 <2e-16 ***
## Crawling_group 6.294e-03 5.791e-03 2.630e+02 1.087 0.278
## Walking_group -1.641e-03 7.375e-03 2.647e+02 -0.222 0.824
## Age_standardized -7.524e-03 7.441e-03 2.763e+02 -1.011 0.313
## Door_side -6.144e-03 3.851e-03 3.986e+03 -1.596 0.111
## Trial_num_rescaled -1.912e-02 1.254e-02 4.027e+03 -1.524 0.128
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Correlation of Fixed Effects:
## (Intr) Crwln_ Wlkng_ Ag_stn Dor_sd
## Crawlng_grp -0.277
## Walking_grp 0.329 0.064
## Ag_stndrdzd -0.121 -0.441 -0.722
## Door_side -0.011 0.003 0.000 -0.002
## Trl_nm_rscl -0.733 -0.007 0.002 0.006 0.010
model2_results <- tidy(model2, conf.int = TRUE, effects = "fixed")
# print(model2_results)
coefs2 <- model2_results
walking_effect2 <- coefs2[coefs2$term == "Walking_group", ]
cat("Walking group effect on High SF looking: β =", round(walking_effect2$estimate, 4),
", two-tailed p =", round(walking_effect2$p.value, 4), "\n")
## Walking group effect on High SF looking: β = -0.0016 , two-tailed p = 0.8241
crawling_effect2 <- coefs2[coefs2$term == "Crawling_group", ]
cat("Crawling group effect on High SF looking: β =", round(crawling_effect2$estimate, 4),
", two-tailed p =", round(crawling_effect2$p.value, 4), "\n")
## Crawling group effect on High SF looking: β = 0.0063 , two-tailed p = 0.2781
p1 <- ggplot(data_clean, aes(x = Age, y = propLookDoor, color = factor(Locomotor_group))) +
geom_point(alpha = 0.3) +
geom_smooth(method = "lm", se = TRUE) +
scale_color_manual(name = "Locomotor Group",
labels = c("-1" = "Pre-locomoting", "0" = "Crawling", "1" = "Walking"),
values = c("-1" = "#0173B2", # Blue
"0" = "#DE8F05", # Orange
"1" = "#CC78BC")) + # Pink/Purple
labs(title = "Proportion Looking to Door by Age and Locomotor Group",
x = "Age (days)", y = "Proportion Looking to Door") +
theme_minimal()
print(p1)
p2 <- ggplot(data_clean, aes(x = factor(Texture_side), y = propLookDoor)) +
geom_boxplot() +
geom_jitter(alpha = 0.2, width = 0.2) +
scale_x_discrete(labels = c("-1" = "High SF on Painting Side", "1" = "High SF on Door Side")) +
labs(title = "Proportion Looking to Door by Texture Side",
x = "Texture Side Condition", y = "Proportion Looking to Door") +
theme_minimal()
print(p2)
p3 <- ggplot(data_clean, aes(x = Trial_num_rescaled, y = propLookDoor)) +
geom_point(alpha = 0.2) +
geom_smooth(method = "lm", se = TRUE) +
labs(title = "Proportion Looking to Door Across Trials",
x = "Trial Number (rescaled 0-1)", y = "Proportion Looking to Door") +
theme_minimal()
print(p3)
model_no_age <- lmer(propLookDoor ~ Crawling_group + Walking_group +
Texture_side + Trial_num_rescaled + (1|Subject),
data = data_clean)
summary(model_no_age)
## Linear mixed model fit by REML. t-tests use Satterthwaite's method [
## lmerModLmerTest]
## Formula: propLookDoor ~ Crawling_group + Walking_group + Texture_side +
## Trial_num_rescaled + (1 | Subject)
## Data: data_clean
##
## REML criterion at convergence: 404.1
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -2.55621 -0.70240 0.00051 0.73371 2.34173
##
## Random effects:
## Groups Name Variance Std.Dev.
## Subject (Intercept) 0.00000 0.0000
## Residual 0.06377 0.2525
## Number of obs: 4281, groups: Subject, 312
##
## Fixed effects:
## Estimate Std. Error df t value Pr(>|t|)
## (Intercept) 5.389e-01 7.978e-03 4.276e+03 67.558 < 2e-16 ***
## Crawling_group -3.323e-04 4.701e-03 4.276e+03 -0.071 0.9436
## Walking_group 1.812e-02 4.621e-03 4.276e+03 3.921 8.95e-05 ***
## Texture_side 9.023e-02 3.863e-03 4.276e+03 23.358 < 2e-16 ***
## Trial_num_rescaled -2.163e-02 1.258e-02 4.276e+03 -1.720 0.0856 .
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Correlation of Fixed Effects:
## (Intr) Crwln_ Wlkng_ Txtr_s
## Crawlng_grp -0.352
## Walking_grp 0.333 -0.405
## Texture_sid 0.034 0.000 0.004
## Trl_nm_rscl -0.770 -0.004 0.010 -0.039
## optimizer (nloptwrap) convergence code: 0 (OK)
## boundary (singular) fit: see help('isSingular')
no_age_results <- tidy(model_no_age, conf.int = TRUE, effects = "fixed")
# print("Results without age predictor:")
# print(no_age_results)
walking_effect_no_age <- no_age_results[no_age_results$term == "Walking_group", ]
walking_p_one_tailed <- walking_effect_no_age$p.value / 2
cat("With age included: β = 0.0089, one-tailed p = 0.0896\n")
## With age included: β = 0.0089, one-tailed p = 0.0896
cat("Without age: β =", round(walking_effect_no_age$estimate, 4),
", one-tailed p =", round(walking_p_one_tailed, 4), "\n")
## Without age: β = 0.0181 , one-tailed p = 0
model1_walking_only <- lmer(propLookDoor ~ Walking_group + Age_standardized +
Texture_side + Trial_num_rescaled + (1|Subject),
data = data_clean)
summary(model1_walking_only)
## Linear mixed model fit by REML. t-tests use Satterthwaite's method [
## lmerModLmerTest]
## Formula: propLookDoor ~ Walking_group + Age_standardized + Texture_side +
## Trial_num_rescaled + (1 | Subject)
## Data: data_clean
##
## REML criterion at convergence: 400.8
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -2.56642 -0.70131 0.00003 0.73984 2.36841
##
## Random effects:
## Groups Name Variance Std.Dev.
## Subject (Intercept) 0.00000 0.0000
## Residual 0.06373 0.2524
## Number of obs: 4281, groups: Subject, 312
##
## Fixed effects:
## Estimate Std. Error df t value Pr(>|t|)
## (Intercept) 5.353e-01 7.742e-03 4.276e+03 69.135 <2e-16 ***
## Walking_group 9.314e-03 6.644e-03 4.276e+03 1.402 0.1610
## Age_standardized 1.024e-02 6.054e-03 4.276e+03 1.691 0.0909 .
## Texture_side 9.021e-02 3.861e-03 4.276e+03 23.361 <2e-16 ***
## Trial_num_rescaled -2.154e-02 1.257e-02 4.276e+03 -1.713 0.0867 .
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Correlation of Fixed Effects:
## (Intr) Wlkng_ Ag_stn Txtr_s
## Walking_grp 0.341
## Ag_stndrdzd -0.265 -0.772
## Texture_sid 0.036 0.005 -0.003
## Trl_nm_rscl -0.796 0.002 0.004 -0.039
## optimizer (nloptwrap) convergence code: 0 (OK)
## boundary (singular) fit: see help('isSingular')
subject_data <- data_clean %>%
group_by(Subject) %>%
summarise(
propLookDoor_mean = mean(propLookDoor, na.rm = TRUE),
Age_standardized = first(Age_standardized), # Same for all trials
Walking_group = first(Walking_group),
Crawling_group = first(Crawling_group),
Locomotor_group = first(Locomotor_group),
n_trials = n(),
.groups = 'drop'
)
# no random effects since one obs per subject
model_subject <- lm(propLookDoor_mean ~ Walking_group + Crawling_group + Age_standardized,
data = subject_data)
summary(model_subject)
##
## Call:
## lm(formula = propLookDoor_mean ~ Walking_group + Crawling_group +
## Age_standardized, data = subject_data)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.34582 -0.03365 0.00363 0.03194 0.29075
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 0.523940 0.004542 115.347 <2e-16 ***
## Walking_group 0.007948 0.006293 1.263 0.2075
## Crawling_group -0.003351 0.004913 -0.682 0.4958
## Age_standardized 0.014487 0.006217 2.330 0.0205 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.06239 on 308 degrees of freedom
## Multiple R-squared: 0.09641, Adjusted R-squared: 0.0876
## F-statistic: 10.95 on 3 and 308 DF, p-value: 7.445e-07
# subject_results <- tidy(model_subject, conf.int = TRUE)
# print(subject_results)
walking_only <- data_clean[data_clean$Locomotor_group == 1, ]
cat("Walking group sample size:", nrow(walking_only), "trials from",
length(unique(walking_only$Subject)), "subjects\n")
## Walking group sample size: 1269 trials from 100 subjects
walking_chance_test <- t.test(walking_only$propLookDoor, mu = 0.5)
print(walking_chance_test)
##
## One Sample t-test
##
## data: walking_only$propLookDoor
## t = 6.2509, df = 1268, p-value = 5.565e-10
## alternative hypothesis: true mean is not equal to 0.5
## 95 percent confidence interval:
## 0.5308817 0.5591328
## sample estimates:
## mean of x
## 0.5450073
cat("\nWalking group door looking:\n")
##
## Walking group door looking:
cat("Mean proportion:", round(mean(walking_only$propLookDoor), 4), "\n")
## Mean proportion: 0.545
cat("Standard deviation:", round(sd(walking_only$propLookDoor), 4), "\n")
## Standard deviation: 0.2565
cat("95% CI:", round(walking_chance_test$conf.int[1], 4), "to",
round(walking_chance_test$conf.int[2], 4), "\n")
## 95% CI: 0.5309 to 0.5591
cohens_d <- (mean(walking_only$propLookDoor) - 0.5) / sd(walking_only$propLookDoor)
cat("Effect size (Cohen's d):", round(cohens_d, 4), "\n")
## Effect size (Cohen's d): 0.1755
# subject level for completeness
walking_subject_means <- walking_only %>%
group_by(Subject) %>%
summarise(propLookDoor_mean = mean(propLookDoor), .groups = 'drop')
subject_chance_test <- t.test(walking_subject_means$propLookDoor_mean, mu = 0.5)
cat("\nSubject-level test against chance:\n")
##
## Subject-level test against chance:
cat("Mean:", round(mean(walking_subject_means$propLookDoor_mean), 4),
", p =", round(subject_chance_test$p.value, 4), "\n")
## Mean: 0.5467 , p = 0
crawling_only <- data_clean[data_clean$Locomotor_group == 0, ]
cat("Crawling group sample size:", nrow(crawling_only), "trials from",
length(unique(crawling_only$Subject)), "subjects\n")
## Crawling group sample size: 1814 trials from 117 subjects
crawling_chance_test <- t.test(crawling_only$propLookDoor, mu = 0.5)
print(crawling_chance_test)
##
## One Sample t-test
##
## data: crawling_only$propLookDoor
## t = 1.529, df = 1813, p-value = 0.1265
## alternative hypothesis: true mean is not equal to 0.5
## 95 percent confidence interval:
## 0.4973085 0.5217288
## sample estimates:
## mean of x
## 0.5095187
cat("\nCrawling group door looking:\n")
##
## Crawling group door looking:
cat("Mean proportion:", round(mean(crawling_only$propLookDoor), 4), "\n")
## Mean proportion: 0.5095
cat("Standard deviation:", round(sd(crawling_only$propLookDoor), 4), "\n")
## Standard deviation: 0.2652
cat("95% CI:", round(crawling_chance_test$conf.int[1], 4), "to",
round(crawling_chance_test$conf.int[2], 4), "\n")
## 95% CI: 0.4973 to 0.5217
cohens_d <- (mean(crawling_only$propLookDoor) - 0.5) / sd(crawling_only$propLookDoor)
cat("Effect size (Cohen's d):", round(cohens_d, 4), "\n")
## Effect size (Cohen's d): 0.0359
crawling_subject_means <- crawling_only %>%
group_by(Subject) %>%
summarise(propLookDoor_mean = mean(propLookDoor), .groups = 'drop')
subject_chance_test <- t.test(crawling_subject_means$propLookDoor_mean, mu = 0.5)
cat("\nSubject-level test against chance:\n")
##
## Subject-level test against chance:
cat("Mean:", round(mean(crawling_subject_means$propLookDoor_mean), 4),
", p =", round(subject_chance_test$p.value, 4), "\n")
## Mean: 0.5092 , p = 0.0224
prewalk_only <- data_clean[data_clean$Locomotor_group == -1, ]
cat("Pre-walking group sample size:", nrow(prewalk_only), "trials from",
length(unique(prewalk_only$Subject)), "subjects\n")
## Pre-walking group sample size: 1198 trials from 95 subjects
prewalk_chance_test <- t.test(prewalk_only$propLookDoor, mu = 0.5)
print(prewalk_chance_test)
##
## One Sample t-test
##
## data: prewalk_only$propLookDoor
## t = 1.2482, df = 1197, p-value = 0.2122
## alternative hypothesis: true mean is not equal to 0.5
## 95 percent confidence interval:
## 0.4941409 0.5263520
## sample estimates:
## mean of x
## 0.5102464
cat("Pre-walking door looking mean:", round(mean(prewalk_only$propLookDoor), 4), "\n")
## Pre-walking door looking mean: 0.5102
cohens_d_prewalk <- (mean(prewalk_only$propLookDoor) - 0.5) / sd(prewalk_only$propLookDoor)
cat("Effect size (Cohen's d):", round(cohens_d_prewalk, 4), "\n")
## Effect size (Cohen's d): 0.0361
# library(dplyr)
#
# comprehensive_split_half <- function(data, max_combinations_per_subject = 1000) {
# subject_trial_counts <- data %>%
# group_by(Subject) %>%
# summarise(n_trials = n(), .groups = 'drop') %>%
# filter(n_trials >= 4)
#
# cat("Subjects with not fewer than 4 trials: ", nrow(subject_trial_counts))
#
# # get all split combinations for one subject
# get_split_combinations <- function(n_trials, max_combinations) {
# half_size <- floor(n_trials / 2)
#
# # all possible combinations
# all_combinations <- combn(1:n_trials, half_size, simplify = FALSE)
#
# # sample if too many combinations
# if(length(all_combinations) > max_combinations) {
# sampled_indices <- sample(1:length(all_combinations), max_combinations)
# all_combinations <- all_combinations[sampled_indices]
# }
#
# return(all_combinations)
# }
#
# subject_splits <- list()
# for(i in 1:nrow(subject_trial_counts)) {
# subject_id <- subject_trial_counts$Subject[i]
# n_trials <- subject_trial_counts$n_trials[i]
#
# subject_splits[[as.character(subject_id)]] <- get_split_combinations(n_trials, max_combinations_per_subject)
# }
#
# # total number of split configurations to test
# n_configurations <- prod(sapply(subject_splits, length))
# max_configs_to_test <- 10000 # Limit for computational feasibility
#
# if(n_configurations > max_configs_to_test) {
# cat("Sampling ", max_configs_to_test, "\n")
# n_to_sample <- max_configs_to_test
# } else {
# cat("Testing", n_configurations, "total split configurations\n")
# n_to_sample <- n_configurations
# }
#
# correlations <- numeric(n_to_sample)
#
# for(config in 1:n_to_sample) {
# half1_means <- numeric(length(subject_splits))
# half2_means <- numeric(length(subject_splits))
#
# # for each subject in this configuration
# for(j in 1:length(subject_splits)) {
# subject_id <- names(subject_splits)[j]
# subject_data <- data[data$Subject == subject_id, ]
#
# # randomly sample one split for this subject in this configuration
# split_options <- subject_splits[[j]]
# chosen_split <- sample(split_options, 1)[[1]]
#
# # calculate half means
# half1_indices <- chosen_split
# half2_indices <- setdiff(1:nrow(subject_data), half1_indices)
#
# half1_means[j] <- mean(subject_data$propLookDoor[half1_indices])
# half2_means[j] <- mean(subject_data$propLookDoor[half2_indices])
# }
#
# # calculate correlation for this configuration
# correlations[config] <- cor(half1_means, half2_means, use = "complete.obs")
#
# if(config %% 1000 == 0) cat("Completed", config, "configurations\n")
# }
#
# return(correlations)
# }
#
# real_correlations <- comprehensive_split_half(data_clean)
#
# mean_real_correlation <- mean(real_correlations, na.rm = TRUE)
# cat("Mean correlation across all split configurations:", round(mean_real_correlation, 3), "\n")
# cat("Range:", round(min(real_correlations, na.rm = TRUE), 3), "to",
# round(max(real_correlations, na.rm = TRUE), 3), "\n")
#
# cat("\nGenerating null distribution...\n")
# n_null_iterations <- 1000
# null_correlations <- numeric(n_null_iterations)
#
# subject_data_for_null <- data_clean %>%
# group_by(Subject) %>%
# summarise(
# n_trials = n(),
# trials = list(propLookDoor),
# .groups = 'drop'
# ) %>%
# filter(n_trials >= 4)
#
# for(i in 1:n_null_iterations) {
# half1_means <- numeric(nrow(subject_data_for_null))
# half2_means <- numeric(nrow(subject_data_for_null))
#
# for(j in 1:nrow(subject_data_for_null)) {
# trials <- subject_data_for_null$trials[[j]]
# n_trials <- length(trials)
# half_size <- floor(n_trials / 2)
#
# # random assignment
# shuffled_indices <- sample(1:n_trials)
# half1_indices <- shuffled_indices[1:half_size]
# half2_indices <- shuffled_indices[(half_size + 1):n_trials]
#
# half1_means[j] <- mean(trials[half1_indices])
# half2_means[j] <- mean(trials[half2_indices])
# }
#
# null_correlations[i] <- cor(half1_means, half2_means)
# }
#
# cat("Mean null correlation:", round(mean(null_correlations), 3), "\n")
# cat("P-value (two-tailed):",
# mean(abs(null_correlations) >= abs(mean_real_correlation)), "\n")
#
# par(mfrow = c(2, 1))
#
# # distribution
# hist(real_correlations, main = "Distribution of Real Split-Half Correlations",
# xlab = "Correlation", breaks = 30, col = "lightblue")
# abline(v = mean_real_correlation, col = "red", lwd = 2)
#
# hist(null_correlations, main = "Real vs. Null Correlations",
# xlab = "Correlation", breaks = 30, col = "lightgray",
# xlim = range(c(real_correlations, null_correlations)))
# abline(v = mean_real_correlation, col = "red", lwd = 2,
# label = paste("Real =", round(mean_real_correlation, 3)))
# abline(v = mean(null_correlations), col = "blue", lwd = 2)
# legend("topright", c("Real correlation", "Null mean"),
# col = c("red", "blue"), lwd = 2)
#
# par(mfrow = c(1, 1))
# library(boot)
# walking_effect_boot <- function(data, indices) {
# boot_data <- data[indices, ]
# walking_mean <- mean(boot_data$propLookDoor[boot_data$Walking_group == 1])
# other_mean <- mean(boot_data$propLookDoor[boot_data$Walking_group == -1])
# return(walking_mean - other_mean)
# }
# boot_result <- boot(data_clean, walking_effect_boot, R = 1000)
# boot.ci(boot_result)