Statistical Analysis

# Read the four CSV files
homeA_1st <- read.csv("HomeA-1st.csv")
homeB_1st <- read.csv("HomeB-1st.csv")
homeA_2nd <- read.csv("HomeA-2nd.csv")
homeB_2nd <- read.csv("HomeB-2nd.csv")

# Add Home and Period columns
homeA_1st$Home <- "A"
homeA_1st$Period <- 1

homeB_1st$Home <- "B"
homeB_1st$Period <- 1

homeA_2nd$Home <- "A"
homeA_2nd$Period <- 2

homeB_2nd$Home <- "B"
homeB_2nd$Period <- 2

df <- bind_rows(homeA_1st, homeB_1st, homeA_2nd, homeB_2nd) %>%
  select(Tester.ID, Total.duration..seconds., Home, Period) %>%
  distinct() %>%
  rename(Subject = Tester.ID, Score = Total.duration..seconds.)

df <- arrange(df, Subject)
print(df)

##      Subject  Score Home Period
## 1  302888762  72.68    A      1
## 2  302888762  27.02    B      2
## 3  456103220  21.98    A      1
## 4  456103220  28.36    B      2
## 5  457895107  41.37    B      1
## 6  457895107  54.97    A      2
## 7  460185122  68.13    B      1
## 8  460185122  30.60    A      2
## 9  461180896  73.52    A      1
## 10 461180896  30.63    B      2
## 11 462498662  69.31    A      1
## 12 462498662  19.68    B      2
## 13 462862639 130.80    A      1
## 14 462862639  27.43    B      2
## 15 463230127  56.15    B      1
## 16 463230127  13.66    A      2
## 17 463237021 119.05    A      1
## 18 463237021  49.28    B      2
## 19 463256322  86.45    A      1
## 20 463256322  30.22    B      2
## 21 463510048 116.98    B      1
## 22 463510048  33.86    A      2
## 23 463682411 100.48    B      1
## 24 463682411  17.72    A      2
## 25 464022343 124.66    A      1
## 26 464022343  49.99    B      2
## 27 464101650  44.72    B      1
## 28 464101650  24.81    A      2
## 29 464168937 101.78    B      1
## 30 464168937  11.05    A      2
## 31 464176866  38.23    B      1
## 32 464176866  65.93    A      2
## 33 464184244  18.11    A      1
## 34 464199019 129.71    B      1
## 35 464199019  72.99    A      2
## 36 464204034  68.50    A      1
## 37 464204034  47.58    B      2
## 38 464220606  35.95    B      1
## 39 464220606  27.69    A      2
## 40 464243752 133.69    B      1
## 41 464243752  51.76    A      2
## 42 464279823  58.06    B      1
## 43 464279823  63.62    A      2
## 44 464283964  66.87    B      1
## 45 464283964 119.52    A      2
## 46 464289978 153.39    A      1
## 47 464289978  33.24    B      2
## 48 464350923  83.29    A      1
## 49 464350923  26.52    B      2
## 50 464351742   5.22    A      1
## 51 464367632  25.42    A      1
## 52 464370209 123.43    A      1
## 53 464370209  36.14    B      2
## 54 464372331  50.47    A      1
## 55 464372331  27.14    B      2
## 56 464376961  54.26    A      1
## 57 464376961  29.98    B      2
## 58 464378135 139.10    A      1
## 59 464378135  47.60    B      2
## 60 464386065  54.78    B      1
## 61 464386065  56.00    A      2
## 62 464386169  76.31    B      1
## 63 464386169  69.52    A      2
## 64 464402783 219.66    A      1
## 65 464402783  39.50    B      2
## 66 464409779  68.42    A      1
## 67 464409779  26.52    B      2
## 68 464610098  38.08    A      1

# Data cleaning, keeping subjects that completed both designs

## Count how many unique Homes each subject has
subjects_complete <- df %>%
  group_by(Subject) %>%
  summarise(n_homes = n_distinct(Home)) %>%
  filter(n_homes == 2) %>%   # only subjects with both A and B
  pull(Subject)

## Filter the main dataset
df_complete <- df %>% filter(Subject %in% subjects_complete)

n_distinct(df_complete$Subject)  # check how many subjects remain

## [1] 32

print(df_complete)

##      Subject  Score Home Period
## 1  302888762  72.68    A      1
## 2  302888762  27.02    B      2
## 3  456103220  21.98    A      1
## 4  456103220  28.36    B      2
## 5  457895107  41.37    B      1
## 6  457895107  54.97    A      2
## 7  460185122  68.13    B      1
## 8  460185122  30.60    A      2
## 9  461180896  73.52    A      1
## 10 461180896  30.63    B      2
## 11 462498662  69.31    A      1
## 12 462498662  19.68    B      2
## 13 462862639 130.80    A      1
## 14 462862639  27.43    B      2
## 15 463230127  56.15    B      1
## 16 463230127  13.66    A      2
## 17 463237021 119.05    A      1
## 18 463237021  49.28    B      2
## 19 463256322  86.45    A      1
## 20 463256322  30.22    B      2
## 21 463510048 116.98    B      1
## 22 463510048  33.86    A      2
## 23 463682411 100.48    B      1
## 24 463682411  17.72    A      2
## 25 464022343 124.66    A      1
## 26 464022343  49.99    B      2
## 27 464101650  44.72    B      1
## 28 464101650  24.81    A      2
## 29 464168937 101.78    B      1
## 30 464168937  11.05    A      2
## 31 464176866  38.23    B      1
## 32 464176866  65.93    A      2
## 33 464199019 129.71    B      1
## 34 464199019  72.99    A      2
## 35 464204034  68.50    A      1
## 36 464204034  47.58    B      2
## 37 464220606  35.95    B      1
## 38 464220606  27.69    A      2
## 39 464243752 133.69    B      1
## 40 464243752  51.76    A      2
## 41 464279823  58.06    B      1
## 42 464279823  63.62    A      2
## 43 464283964  66.87    B      1
## 44 464283964 119.52    A      2
## 45 464289978 153.39    A      1
## 46 464289978  33.24    B      2
## 47 464350923  83.29    A      1
## 48 464350923  26.52    B      2
## 49 464370209 123.43    A      1
## 50 464370209  36.14    B      2
## 51 464372331  50.47    A      1
## 52 464372331  27.14    B      2
## 53 464376961  54.26    A      1
## 54 464376961  29.98    B      2
## 55 464378135 139.10    A      1
## 56 464378135  47.60    B      2
## 57 464386065  54.78    B      1
## 58 464386065  56.00    A      2
## 59 464386169  76.31    B      1
## 60 464386169  69.52    A      2
## 61 464402783 219.66    A      1
## 62 464402783  39.50    B      2
## 63 464409779  68.42    A      1
## 64 464409779  26.52    B      2

ggplot(df_complete, aes(x = Home, y = Score)) +
  geom_boxplot(fill = "skyblue", alpha = 0.6) +
  geom_jitter(width = 0.1, alpha = 0.5, color = "darkblue") +
  labs(title = "Distribution of Scores by Home",
       x = "Home",
       y = "Total Duration (seconds)")

ggplot(df_complete, aes(x = Home, y = Score, fill = factor(Period))) +
  geom_boxplot(alpha = 0.6, position = position_dodge(0.8)) +
  labs(title = "Scores by Home and Period",
       x = "Home",
       y = "Total Duration (seconds)",
       fill = "Period") +
  theme_minimal()

means <- df_complete %>%
  group_by(Home, Period) %>%
  summarise(MeanScore = mean(Score), .groups = 'drop')

ggplot(means, aes(x = Home, y = MeanScore, group = Period, color = factor(Period))) +
  geom_line(size = 1) +
  geom_point(size = 3) +
  labs(title = "Interaction Plot: Home × Period",
       x = "Home",
       y = "Mean Total Duration (seconds)",
       color = "Period") +
  theme_minimal()

## Warning: Using `size` aesthetic for lines was deprecated in ggplot2 3.4.0.
## ℹ Please use `linewidth` instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.

ggplot(df_complete, aes(x = Home, y = Score, group = Subject)) +
  geom_line(alpha = 0.5) +
  geom_point(size = 2, color = "red") +
  labs(title = "Paired Scores per Subject",
       x = "Home",
       y = "Total Duration (seconds)")

ggplot(df_complete, aes(x = Score, fill = Home)) +
  geom_histogram(position = "dodge", bins = 10, color = "black", alpha = 0.7) +
  facet_wrap(~ Home) +
  labs(title = "Histogram of Score by Home",
       x = "Score",
       y = "Count") +
  theme_minimal()

# Create separate datasets for each condition
df_A1 <- df_complete %>% filter(Home == "A", Period == 1)
df_A2 <- df_complete %>% filter(Home == "A", Period == 2)
df_B1 <- df_complete %>% filter(Home == "B", Period == 1)
df_B2 <- df_complete %>% filter(Home == "B", Period == 2)

# Home A, Period 1
ggplot(df_A1, aes(x = Score)) +
  geom_histogram(color = "black", fill = "steelblue", bins = 10, alpha = 0.7) +
  labs(title = "Histogram: Home A, Period 1", x = "Score", y = "Count") +
  theme_minimal()

# Home A, Period 2
ggplot(df_A2, aes(x = Score)) +
  geom_histogram(color = "black", fill = "steelblue", bins = 10, alpha = 0.7) +
  labs(title = "Histogram: Home A, Period 2", x = "Score", y = "Count") +
  theme_minimal()

# Home B, Period 1
ggplot(df_B1, aes(x = Score)) +
  geom_histogram(color = "black", fill = "salmon", bins = 10, alpha = 0.7) +
  labs(title = "Histogram: Home B, Period 1", x = "Score", y = "Count") +
  theme_minimal()

# Home B, Period 2
ggplot(df_B2, aes(x = Score)) +
  geom_histogram(color = "black", fill = "salmon", bins = 10, alpha = 0.7) +
  labs(title = "Histogram: Home B, Period 2", x = "Score", y = "Count") +
  theme_minimal()

df_complete <- df_complete %>%
  mutate(
    Home = factor(Home),
    Period = factor(Period)
  )

anova_model <- aov(Score ~ Home + Period + Error(Subject/Home), data = df_complete)
summary(anova_model)

## 
## Error: Subject
##           Df Sum Sq Mean Sq F value Pr(>F)
## Residuals  1  607.1   607.1               
## 
## Error: Subject:Home
##      Df Sum Sq Mean Sq
## Home  1   6883    6883
## 
## Error: Within
##           Df Sum Sq Mean Sq F value   Pr(>F)    
## Home       1    435     435   0.395    0.532    
## Period     1  32952   32952  29.952 9.51e-07 ***
## Residuals 59  64911    1100                     
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

summary_home <- df %>%
  group_by(Home) %>%
  summarise(
    N = n(),
    Mean = mean(Score),
    SD = sd(Score),
    SE = SD / sqrt(N),
    CI_lower = Mean - qt(0.975, df = N-1) * SE,
    CI_upper = Mean + qt(0.975, df = N-1) * SE
  )

summary_home

## # A tibble: 2 × 7
##   Home      N  Mean    SD    SE CI_lower CI_upper
##   <chr> <int> <dbl> <dbl> <dbl>    <dbl>    <dbl>
## 1 A        36  68.3  47.4  7.90     52.3     84.4
## 2 B        32  53.1  31.3  5.54     41.8     64.4

summary_home_period <- df %>%
  group_by(Home, Period) %>%
  summarise(
    N = n(),
    Mean = mean(Score),
    SD = sd(Score),
    SE = SD / sqrt(N),
    CI_lower = Mean - qt(0.975, df = N-1) * SE,
    CI_upper = Mean + qt(0.975, df = N-1) * SE
  )

## `summarise()` has grouped output by 'Home'. You can override using the
## `.groups` argument.

summary_home_period

## # A tibble: 4 × 8
## # Groups:   Home [2]
##   Home  Period     N  Mean    SD    SE CI_lower CI_upper
##   <chr>  <dbl> <int> <dbl> <dbl> <dbl>    <dbl>    <dbl>
## 1 A          1    21  83.1 52.8  11.5      59.1    107. 
## 2 A          2    15  47.6 29.0   7.50     31.5     63.7
## 3 B          1    15  74.9 33.4   8.63     56.4     93.4
## 4 B          2    17  33.9  9.43  2.29     29.1     38.8

# Pivot wide: one row per subject, columns for Home A and B
# Using paired difference
df_wide <- df_complete %>%
  select(Subject, Home, Score) %>%
  pivot_wider(names_from = Home, values_from = Score) %>%
  drop_na(A, B) %>%   # ensure fully paired
  mutate(Diff = A - B)  # compute Home A minus Home B

print(df_wide)

## # A tibble: 32 × 4
##      Subject     A     B   Diff
##        <int> <dbl> <dbl>  <dbl>
##  1 302888762  72.7  27.0  45.7 
##  2 456103220  22.0  28.4  -6.38
##  3 457895107  55.0  41.4  13.6 
##  4 460185122  30.6  68.1 -37.5 
##  5 461180896  73.5  30.6  42.9 
##  6 462498662  69.3  19.7  49.6 
##  7 462862639 131.   27.4 103.  
##  8 463230127  13.7  56.2 -42.5 
##  9 463237021 119.   49.3  69.8 
## 10 463256322  86.4  30.2  56.2 
## # ℹ 22 more rows

# Paired two-tailed t-test
t_test_result <- t.test(df_wide$A, df_wide$B, 
                        paired = TRUE,   # paired because within-subject
                        var.equal = FALSE,
                        alternative = "two.sided")

t_test_result

## 
##  Paired t-test
## 
## data:  df_wide$A and df_wide$B
## t = 1.8694, df = 31, p-value = 0.07104
## alternative hypothesis: true mean difference is not equal to 0
## 95 percent confidence interval:
##  -1.912665 43.952040
## sample estimates:
## mean difference 
##        21.01969

N <- nrow(df_wide)
mean_diff <- mean(df_wide$Diff)
sd_diff <- sd(df_wide$Diff)
se_diff <- sd_diff / sqrt(N)

# 95% confidence interval
ci_lower <- mean_diff - qt(0.975, df=N-1) * se_diff
ci_upper <- mean_diff + qt(0.975, df=N-1) * se_diff

# Combine into a table
ci_table <- data.frame(
  N = N,
  Mean_Diff = mean_diff,
  SD_Diff = sd_diff,
  SE_Diff = se_diff,
  CI_lower = ci_lower,
  CI_upper = ci_upper
)

ci_table

##    N Mean_Diff  SD_Diff  SE_Diff  CI_lower CI_upper
## 1 32  21.01969 63.60585 11.24403 -1.912665 43.95204

Statistical Analysis

2025-11-05