Statistical Analysis

# Read the four CSV files
homeA_1st <- read.csv("HomeA-1st.csv")
homeB_1st <- read.csv("HomeB-1st.csv")
homeA_2nd <- read.csv("HomeA-2nd.csv")
homeB_2nd <- read.csv("HomeB-2nd.csv")

# Add Home and Period columns
homeA_1st$Home <- "A"
homeA_1st$Period <- 1

homeB_1st$Home <- "B"
homeB_1st$Period <- 1

homeA_2nd$Home <- "A"
homeA_2nd$Period <- 2

homeB_2nd$Home <- "B"
homeB_2nd$Period <- 2

df <- bind_rows(homeA_1st, homeB_1st, homeA_2nd, homeB_2nd) %>%
  select(Tester.ID, Total.duration..seconds., Home, Period) %>%
  distinct() %>%
  rename(Subject = Tester.ID, Score = Total.duration..seconds.)

df <- arrange(df, Subject)
print(df)

##      Subject  Score Home Period
## 1  302888762  72.68    A      1
## 2  302888762  27.02    B      2
## 3  456103220  21.98    A      1
## 4  456103220  28.36    B      2
## 5  457895107  41.37    B      1
## 6  457895107  54.97    A      2
## 7  460185122  68.13    B      1
## 8  460185122  30.60    A      2
## 9  461180896  73.52    A      1
## 10 461180896  30.63    B      2
## 11 462498662  69.31    A      1
## 12 462498662  19.68    B      2
## 13 462862639 130.80    A      1
## 14 462862639  27.43    B      2
## 15 463230127  56.15    B      1
## 16 463230127  13.66    A      2
## 17 463237021 119.05    A      1
## 18 463237021  49.28    B      2
## 19 463256322  86.45    A      1
## 20 463256322  30.22    B      2
## 21 463510048 116.98    B      1
## 22 463510048  33.86    A      2
## 23 463682411 100.48    B      1
## 24 463682411  17.72    A      2
## 25 464022343 124.66    A      1
## 26 464022343  49.99    B      2
## 27 464101650  44.72    B      1
## 28 464101650  24.81    A      2
## 29 464168937 101.78    B      1
## 30 464168937  11.05    A      2
## 31 464176866  38.23    B      1
## 32 464176866  65.93    A      2
## 33 464184244  18.11    A      1
## 34 464199019 129.71    B      1
## 35 464199019  72.99    A      2
## 36 464204034  68.50    A      1
## 37 464204034  47.58    B      2
## 38 464220606  35.95    B      1
## 39 464220606  27.69    A      2
## 40 464243752 133.69    B      1
## 41 464243752  51.76    A      2
## 42 464279823  58.06    B      1
## 43 464279823  63.62    A      2
## 44 464283964  66.87    B      1
## 45 464283964 119.52    A      2
## 46 464289978 153.39    A      1
## 47 464289978  33.24    B      2
## 48 464350923  83.29    A      1
## 49 464350923  26.52    B      2
## 50 464351742   5.22    A      1
## 51 464367632  25.42    A      1
## 52 464370209 123.43    A      1
## 53 464370209  36.14    B      2
## 54 464372331  50.47    A      1
## 55 464372331  27.14    B      2
## 56 464376961  54.26    A      1
## 57 464376961  29.98    B      2
## 58 464378135 139.10    A      1
## 59 464378135  47.60    B      2
## 60 464386065  54.78    B      1
## 61 464386065  56.00    A      2
## 62 464386169  76.31    B      1
## 63 464386169  69.52    A      2
## 64 464402783 219.66    A      1
## 65 464402783  39.50    B      2
## 66 464409779  68.42    A      1
## 67 464409779  26.52    B      2
## 68 464610098  38.08    A      1

# Data cleaning, keeping subjects that completed both designs

## Count how many unique Homes each subject has
subjects_complete <- df %>%
  group_by(Subject) %>%
  summarise(n_homes = n_distinct(Home)) %>%
  filter(n_homes == 2) %>%   # only subjects with both A and B
  pull(Subject)

## Filter the main dataset
df_complete <- df %>% filter(Subject %in% subjects_complete)

n_distinct(df_complete$Subject)  # check how many subjects remain

## [1] 32

print(df_complete)

##      Subject  Score Home Period
## 1  302888762  72.68    A      1
## 2  302888762  27.02    B      2
## 3  456103220  21.98    A      1
## 4  456103220  28.36    B      2
## 5  457895107  41.37    B      1
## 6  457895107  54.97    A      2
## 7  460185122  68.13    B      1
## 8  460185122  30.60    A      2
## 9  461180896  73.52    A      1
## 10 461180896  30.63    B      2
## 11 462498662  69.31    A      1
## 12 462498662  19.68    B      2
## 13 462862639 130.80    A      1
## 14 462862639  27.43    B      2
## 15 463230127  56.15    B      1
## 16 463230127  13.66    A      2
## 17 463237021 119.05    A      1
## 18 463237021  49.28    B      2
## 19 463256322  86.45    A      1
## 20 463256322  30.22    B      2
## 21 463510048 116.98    B      1
## 22 463510048  33.86    A      2
## 23 463682411 100.48    B      1
## 24 463682411  17.72    A      2
## 25 464022343 124.66    A      1
## 26 464022343  49.99    B      2
## 27 464101650  44.72    B      1
## 28 464101650  24.81    A      2
## 29 464168937 101.78    B      1
## 30 464168937  11.05    A      2
## 31 464176866  38.23    B      1
## 32 464176866  65.93    A      2
## 33 464199019 129.71    B      1
## 34 464199019  72.99    A      2
## 35 464204034  68.50    A      1
## 36 464204034  47.58    B      2
## 37 464220606  35.95    B      1
## 38 464220606  27.69    A      2
## 39 464243752 133.69    B      1
## 40 464243752  51.76    A      2
## 41 464279823  58.06    B      1
## 42 464279823  63.62    A      2
## 43 464283964  66.87    B      1
## 44 464283964 119.52    A      2
## 45 464289978 153.39    A      1
## 46 464289978  33.24    B      2
## 47 464350923  83.29    A      1
## 48 464350923  26.52    B      2
## 49 464370209 123.43    A      1
## 50 464370209  36.14    B      2
## 51 464372331  50.47    A      1
## 52 464372331  27.14    B      2
## 53 464376961  54.26    A      1
## 54 464376961  29.98    B      2
## 55 464378135 139.10    A      1
## 56 464378135  47.60    B      2
## 57 464386065  54.78    B      1
## 58 464386065  56.00    A      2
## 59 464386169  76.31    B      1
## 60 464386169  69.52    A      2
## 61 464402783 219.66    A      1
## 62 464402783  39.50    B      2
## 63 464409779  68.42    A      1
## 64 464409779  26.52    B      2

ggplot(df_complete, aes(x = Home, y = Score)) +
  geom_boxplot(fill = "skyblue", alpha = 0.6) +
  geom_jitter(width = 0.1, alpha = 0.5, color = "darkblue") +
  labs(title = "Distribution of Scores by Home",
       x = "Home",
       y = "Total Duration (seconds)")

ggplot(df_complete, aes(x = Home, y = Score, fill = factor(Period))) +
  geom_boxplot(alpha = 0.6, position = position_dodge(0.8)) +
  labs(title = "Scores by Home and Period",
       x = "Home",
       y = "Total Duration (seconds)",
       fill = "Period") +
  theme_minimal()

means <- df_complete %>%
  group_by(Home, Period) %>%
  summarise(MeanScore = mean(Score), .groups = 'drop')

ggplot(means, aes(x = Home, y = MeanScore, group = Period, color = factor(Period))) +
  geom_line(size = 1) +
  geom_point(size = 3) +
  labs(title = "Interaction Plot: Home × Period",
       x = "Home",
       y = "Mean Total Duration (seconds)",
       color = "Period") +
  theme_minimal()

## Warning: Using `size` aesthetic for lines was deprecated in ggplot2 3.4.0.
## ℹ Please use `linewidth` instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.

ggplot(df_complete, aes(x = Home, y = Score, group = Subject)) +
  geom_line(alpha = 0.5) +
  geom_point(size = 2, color = "red") +
  labs(title = "Paired Scores per Subject",
       x = "Home",
       y = "Total Duration (seconds)")

ggplot(df_complete, aes(x = Score, fill = Home)) +
  geom_histogram(position = "dodge", bins = 10, color = "black", alpha = 0.7) +
  facet_wrap(~ Home) +
  labs(title = "Histogram of Score by Home",
       x = "Score",
       y = "Count") +
  theme_minimal()

# Create separate datasets for each condition
df_A1 <- df_complete %>% filter(Home == "A", Period == 1)
df_A2 <- df_complete %>% filter(Home == "A", Period == 2)
df_B1 <- df_complete %>% filter(Home == "B", Period == 1)
df_B2 <- df_complete %>% filter(Home == "B", Period == 2)

# Home A, Period 1
ggplot(df_A1, aes(x = Score)) +
  geom_histogram(color = "black", fill = "steelblue", bins = 10, alpha = 0.7) +
  labs(title = "Histogram: Home A, Period 1", x = "Score", y = "Count") +
  theme_minimal()

# Home A, Period 2
ggplot(df_A2, aes(x = Score)) +
  geom_histogram(color = "black", fill = "steelblue", bins = 10, alpha = 0.7) +
  labs(title = "Histogram: Home A, Period 2", x = "Score", y = "Count") +
  theme_minimal()

# Home B, Period 1
ggplot(df_B1, aes(x = Score)) +
  geom_histogram(color = "black", fill = "salmon", bins = 10, alpha = 0.7) +
  labs(title = "Histogram: Home B, Period 1", x = "Score", y = "Count") +
  theme_minimal()

# Home B, Period 2
ggplot(df_B2, aes(x = Score)) +
  geom_histogram(color = "black", fill = "salmon", bins = 10, alpha = 0.7) +
  labs(title = "Histogram: Home B, Period 2", x = "Score", y = "Count") +
  theme_minimal()

df_wide <- df_complete %>%
  select(Subject, Home, Score) %>%
  pivot_wider(names_from = Home, values_from = Score)

print(df_wide)

## # A tibble: 32 × 3
##      Subject     A     B
##        <int> <dbl> <dbl>
##  1 302888762  72.7  27.0
##  2 456103220  22.0  28.4
##  3 457895107  55.0  41.4
##  4 460185122  30.6  68.1
##  5 461180896  73.5  30.6
##  6 462498662  69.3  19.7
##  7 462862639 131.   27.4
##  8 463230127  13.7  56.2
##  9 463237021 119.   49.3
## 10 463256322  86.4  30.2
## # ℹ 22 more rows

wilcox_test <- wilcox.test(df_wide$A, df_wide$B, paired = TRUE, alternative = "two.sided")
print(wilcox_test)

## 
##  Wilcoxon signed rank exact test
## 
## data:  df_wide$A and df_wide$B
## V = 360, p-value = 0.07388
## alternative hypothesis: true location shift is not equal to 0

df_wide %>%
  summarise(
    median_A = median(A, na.rm = TRUE),
    median_B = median(B, na.rm = TRUE)
  )

## # A tibble: 1 × 2
##   median_A median_B
##      <dbl>    <dbl>
## 1     68.5     43.0

Statistical Analysis - Time Taken

2025-11-05