# Read the four CSV files
homeA_1st <- read.csv("HomeA-1st.csv")
homeB_1st <- read.csv("HomeB-1st.csv")
homeA_2nd <- read.csv("HomeA-2nd.csv")
homeB_2nd <- read.csv("HomeB-2nd.csv")
# Add Home and Period columns
homeA_1st$Home <- "A"
homeA_1st$Period <- 1
homeB_1st$Home <- "B"
homeB_1st$Period <- 1
homeA_2nd$Home <- "A"
homeA_2nd$Period <- 2
homeB_2nd$Home <- "B"
homeB_2nd$Period <- 2
df <- bind_rows(homeA_1st, homeB_1st, homeA_2nd, homeB_2nd) %>%
select(Tester.ID, Total.duration..seconds., Home, Period) %>%
distinct() %>%
rename(Subject = Tester.ID, Score = Total.duration..seconds.)
df <- arrange(df, Subject)
print(df)
## Subject Score Home Period
## 1 302888762 72.68 A 1
## 2 302888762 27.02 B 2
## 3 456103220 21.98 A 1
## 4 456103220 28.36 B 2
## 5 457895107 41.37 B 1
## 6 457895107 54.97 A 2
## 7 460185122 68.13 B 1
## 8 460185122 30.60 A 2
## 9 461180896 73.52 A 1
## 10 461180896 30.63 B 2
## 11 462498662 69.31 A 1
## 12 462498662 19.68 B 2
## 13 462862639 130.80 A 1
## 14 462862639 27.43 B 2
## 15 463230127 56.15 B 1
## 16 463230127 13.66 A 2
## 17 463237021 119.05 A 1
## 18 463237021 49.28 B 2
## 19 463256322 86.45 A 1
## 20 463256322 30.22 B 2
## 21 463510048 116.98 B 1
## 22 463510048 33.86 A 2
## 23 463682411 100.48 B 1
## 24 463682411 17.72 A 2
## 25 464022343 124.66 A 1
## 26 464022343 49.99 B 2
## 27 464101650 44.72 B 1
## 28 464101650 24.81 A 2
## 29 464168937 101.78 B 1
## 30 464168937 11.05 A 2
## 31 464176866 38.23 B 1
## 32 464176866 65.93 A 2
## 33 464184244 18.11 A 1
## 34 464199019 129.71 B 1
## 35 464199019 72.99 A 2
## 36 464204034 68.50 A 1
## 37 464204034 47.58 B 2
## 38 464220606 35.95 B 1
## 39 464220606 27.69 A 2
## 40 464243752 133.69 B 1
## 41 464243752 51.76 A 2
## 42 464279823 58.06 B 1
## 43 464279823 63.62 A 2
## 44 464283964 66.87 B 1
## 45 464283964 119.52 A 2
## 46 464289978 153.39 A 1
## 47 464289978 33.24 B 2
## 48 464350923 83.29 A 1
## 49 464350923 26.52 B 2
## 50 464351742 5.22 A 1
## 51 464367632 25.42 A 1
## 52 464370209 123.43 A 1
## 53 464370209 36.14 B 2
## 54 464372331 50.47 A 1
## 55 464372331 27.14 B 2
## 56 464376961 54.26 A 1
## 57 464376961 29.98 B 2
## 58 464378135 139.10 A 1
## 59 464378135 47.60 B 2
## 60 464386065 54.78 B 1
## 61 464386065 56.00 A 2
## 62 464386169 76.31 B 1
## 63 464386169 69.52 A 2
## 64 464402783 219.66 A 1
## 65 464402783 39.50 B 2
## 66 464409779 68.42 A 1
## 67 464409779 26.52 B 2
## 68 464610098 38.08 A 1
# Data cleaning, keeping subjects that completed both designs
## Count how many unique Homes each subject has
subjects_complete <- df %>%
group_by(Subject) %>%
summarise(n_homes = n_distinct(Home)) %>%
filter(n_homes == 2) %>% # only subjects with both A and B
pull(Subject)
## Filter the main dataset
df_complete <- df %>% filter(Subject %in% subjects_complete)
n_distinct(df_complete$Subject) # check how many subjects remain
## [1] 32
print(df_complete)
## Subject Score Home Period
## 1 302888762 72.68 A 1
## 2 302888762 27.02 B 2
## 3 456103220 21.98 A 1
## 4 456103220 28.36 B 2
## 5 457895107 41.37 B 1
## 6 457895107 54.97 A 2
## 7 460185122 68.13 B 1
## 8 460185122 30.60 A 2
## 9 461180896 73.52 A 1
## 10 461180896 30.63 B 2
## 11 462498662 69.31 A 1
## 12 462498662 19.68 B 2
## 13 462862639 130.80 A 1
## 14 462862639 27.43 B 2
## 15 463230127 56.15 B 1
## 16 463230127 13.66 A 2
## 17 463237021 119.05 A 1
## 18 463237021 49.28 B 2
## 19 463256322 86.45 A 1
## 20 463256322 30.22 B 2
## 21 463510048 116.98 B 1
## 22 463510048 33.86 A 2
## 23 463682411 100.48 B 1
## 24 463682411 17.72 A 2
## 25 464022343 124.66 A 1
## 26 464022343 49.99 B 2
## 27 464101650 44.72 B 1
## 28 464101650 24.81 A 2
## 29 464168937 101.78 B 1
## 30 464168937 11.05 A 2
## 31 464176866 38.23 B 1
## 32 464176866 65.93 A 2
## 33 464199019 129.71 B 1
## 34 464199019 72.99 A 2
## 35 464204034 68.50 A 1
## 36 464204034 47.58 B 2
## 37 464220606 35.95 B 1
## 38 464220606 27.69 A 2
## 39 464243752 133.69 B 1
## 40 464243752 51.76 A 2
## 41 464279823 58.06 B 1
## 42 464279823 63.62 A 2
## 43 464283964 66.87 B 1
## 44 464283964 119.52 A 2
## 45 464289978 153.39 A 1
## 46 464289978 33.24 B 2
## 47 464350923 83.29 A 1
## 48 464350923 26.52 B 2
## 49 464370209 123.43 A 1
## 50 464370209 36.14 B 2
## 51 464372331 50.47 A 1
## 52 464372331 27.14 B 2
## 53 464376961 54.26 A 1
## 54 464376961 29.98 B 2
## 55 464378135 139.10 A 1
## 56 464378135 47.60 B 2
## 57 464386065 54.78 B 1
## 58 464386065 56.00 A 2
## 59 464386169 76.31 B 1
## 60 464386169 69.52 A 2
## 61 464402783 219.66 A 1
## 62 464402783 39.50 B 2
## 63 464409779 68.42 A 1
## 64 464409779 26.52 B 2
ggplot(df_complete, aes(x = Home, y = Score)) +
geom_boxplot(fill = "skyblue", alpha = 0.6) +
geom_jitter(width = 0.1, alpha = 0.5, color = "darkblue") +
labs(title = "Distribution of Scores by Home",
x = "Home",
y = "Total Duration (seconds)")

ggplot(df_complete, aes(x = Home, y = Score, fill = factor(Period))) +
geom_boxplot(alpha = 0.6, position = position_dodge(0.8)) +
labs(title = "Scores by Home and Period",
x = "Home",
y = "Total Duration (seconds)",
fill = "Period") +
theme_minimal()

means <- df_complete %>%
group_by(Home, Period) %>%
summarise(MeanScore = mean(Score), .groups = 'drop')
ggplot(means, aes(x = Home, y = MeanScore, group = Period, color = factor(Period))) +
geom_line(size = 1) +
geom_point(size = 3) +
labs(title = "Interaction Plot: Home × Period",
x = "Home",
y = "Mean Total Duration (seconds)",
color = "Period") +
theme_minimal()
## Warning: Using `size` aesthetic for lines was deprecated in ggplot2 3.4.0.
## ℹ Please use `linewidth` instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.

ggplot(df_complete, aes(x = Home, y = Score, group = Subject)) +
geom_line(alpha = 0.5) +
geom_point(size = 2, color = "red") +
labs(title = "Paired Scores per Subject",
x = "Home",
y = "Total Duration (seconds)")

ggplot(df_complete, aes(x = Score, fill = Home)) +
geom_histogram(position = "dodge", bins = 10, color = "black", alpha = 0.7) +
facet_wrap(~ Home) +
labs(title = "Histogram of Score by Home",
x = "Score",
y = "Count") +
theme_minimal()

# Create separate datasets for each condition
df_A1 <- df_complete %>% filter(Home == "A", Period == 1)
df_A2 <- df_complete %>% filter(Home == "A", Period == 2)
df_B1 <- df_complete %>% filter(Home == "B", Period == 1)
df_B2 <- df_complete %>% filter(Home == "B", Period == 2)
# Home A, Period 1
ggplot(df_A1, aes(x = Score)) +
geom_histogram(color = "black", fill = "steelblue", bins = 10, alpha = 0.7) +
labs(title = "Histogram: Home A, Period 1", x = "Score", y = "Count") +
theme_minimal()

# Home A, Period 2
ggplot(df_A2, aes(x = Score)) +
geom_histogram(color = "black", fill = "steelblue", bins = 10, alpha = 0.7) +
labs(title = "Histogram: Home A, Period 2", x = "Score", y = "Count") +
theme_minimal()

# Home B, Period 1
ggplot(df_B1, aes(x = Score)) +
geom_histogram(color = "black", fill = "salmon", bins = 10, alpha = 0.7) +
labs(title = "Histogram: Home B, Period 1", x = "Score", y = "Count") +
theme_minimal()

# Home B, Period 2
ggplot(df_B2, aes(x = Score)) +
geom_histogram(color = "black", fill = "salmon", bins = 10, alpha = 0.7) +
labs(title = "Histogram: Home B, Period 2", x = "Score", y = "Count") +
theme_minimal()

df_complete <- df_complete %>%
mutate(
Home = factor(Home),
Period = factor(Period)
)
anova_model <- aov(Score ~ Home + Period + Error(Subject/Home), data = df_complete)
summary(anova_model)
##
## Error: Subject
## Df Sum Sq Mean Sq F value Pr(>F)
## Residuals 1 607.1 607.1
##
## Error: Subject:Home
## Df Sum Sq Mean Sq
## Home 1 6883 6883
##
## Error: Within
## Df Sum Sq Mean Sq F value Pr(>F)
## Home 1 435 435 0.395 0.532
## Period 1 32952 32952 29.952 9.51e-07 ***
## Residuals 59 64911 1100
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
summary_home <- df %>%
group_by(Home) %>%
summarise(
N = n(),
Mean = mean(Score),
SD = sd(Score),
SE = SD / sqrt(N),
CI_lower = Mean - qt(0.975, df = N-1) * SE,
CI_upper = Mean + qt(0.975, df = N-1) * SE
)
summary_home
## # A tibble: 2 × 7
## Home N Mean SD SE CI_lower CI_upper
## <chr> <int> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 A 36 68.3 47.4 7.90 52.3 84.4
## 2 B 32 53.1 31.3 5.54 41.8 64.4
summary_home_period <- df %>%
group_by(Home, Period) %>%
summarise(
N = n(),
Mean = mean(Score),
SD = sd(Score),
SE = SD / sqrt(N),
CI_lower = Mean - qt(0.975, df = N-1) * SE,
CI_upper = Mean + qt(0.975, df = N-1) * SE
)
## `summarise()` has grouped output by 'Home'. You can override using the
## `.groups` argument.
summary_home_period
## # A tibble: 4 × 8
## # Groups: Home [2]
## Home Period N Mean SD SE CI_lower CI_upper
## <chr> <dbl> <int> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 A 1 21 83.1 52.8 11.5 59.1 107.
## 2 A 2 15 47.6 29.0 7.50 31.5 63.7
## 3 B 1 15 74.9 33.4 8.63 56.4 93.4
## 4 B 2 17 33.9 9.43 2.29 29.1 38.8
# Pivot wide: one row per subject, columns for Home A and B
# Using paired difference
df_wide <- df_complete %>%
select(Subject, Home, Score) %>%
pivot_wider(names_from = Home, values_from = Score) %>%
drop_na(A, B) %>% # ensure fully paired
mutate(Diff = A - B) # compute Home A minus Home B
print(df_wide)
## # A tibble: 32 × 4
## Subject A B Diff
## <int> <dbl> <dbl> <dbl>
## 1 302888762 72.7 27.0 45.7
## 2 456103220 22.0 28.4 -6.38
## 3 457895107 55.0 41.4 13.6
## 4 460185122 30.6 68.1 -37.5
## 5 461180896 73.5 30.6 42.9
## 6 462498662 69.3 19.7 49.6
## 7 462862639 131. 27.4 103.
## 8 463230127 13.7 56.2 -42.5
## 9 463237021 119. 49.3 69.8
## 10 463256322 86.4 30.2 56.2
## # ℹ 22 more rows
# Paired two-tailed t-test
t_test_result <- t.test(df_wide$A, df_wide$B,
paired = TRUE, # paired because within-subject
var.equal = FALSE,
alternative = "two.sided")
t_test_result
##
## Paired t-test
##
## data: df_wide$A and df_wide$B
## t = 1.8694, df = 31, p-value = 0.07104
## alternative hypothesis: true mean difference is not equal to 0
## 95 percent confidence interval:
## -1.912665 43.952040
## sample estimates:
## mean difference
## 21.01969
N <- nrow(df_wide)
mean_diff <- mean(df_wide$Diff)
sd_diff <- sd(df_wide$Diff)
se_diff <- sd_diff / sqrt(N)
# 95% confidence interval
ci_lower <- mean_diff - qt(0.975, df=N-1) * se_diff
ci_upper <- mean_diff + qt(0.975, df=N-1) * se_diff
# Combine into a table
ci_table <- data.frame(
N = N,
Mean_Diff = mean_diff,
SD_Diff = sd_diff,
SE_Diff = se_diff,
CI_lower = ci_lower,
CI_upper = ci_upper
)
ci_table
## N Mean_Diff SD_Diff SE_Diff CI_lower CI_upper
## 1 32 21.01969 63.60585 11.24403 -1.912665 43.95204