library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(ggplot2)
library(tidyr)
# Read the 4 CSV files
collapsible_1 <- read.csv("Collapsible-1.csv")
collapsible_2 <- read.csv("Collapsible-2.csv")
noncollapsible_1 <- read.csv("Noncollapsible-1.csv")
noncollapsible_2 <- read.csv("Noncollapsible-2.csv")
# Add Home and Period columns
collapsible_1$Version <- "B"
collapsible_1$Period <- 1
collapsible_2$Version <- "B"
collapsible_2$Period <- 2
noncollapsible_1$Version <- "A"
noncollapsible_1$Period <- 1
noncollapsible_2$Version <- "A"
noncollapsible_2$Period <- 2
df <- bind_rows(collapsible_1,collapsible_2,noncollapsible_1,noncollapsible_2) %>%
select(Tester.ID, Value, Version, Period) %>%
distinct() %>%
rename(Subject = Tester.ID, Score = Value)
print(df)
## Subject Score Version Period
## 1 457895107 5 B 1
## 2 464101650 7 B 1
## 3 464168937 6 B 1
## 4 464176866 9 B 1
## 5 460185122 6 B 1
## 6 464199019 8 B 1
## 7 464243752 8 B 1
## 8 463230127 5 B 1
## 9 464279823 3 B 1
## 10 464283964 3 B 1
## 11 463682411 10 B 1
## 12 464386065 10 B 1
## 13 464386169 6 B 1
## 14 463510048 8 B 1
## 15 456103220 6 B 2
## 16 464022343 10 B 2
## 17 464204034 6 B 2
## 18 464289978 7 B 2
## 19 462862639 2 B 2
## 20 302888762 4 B 2
## 21 462498662 7 B 2
## 22 464350923 9 B 2
## 23 464370209 5 B 2
## 24 464372331 8 B 2
## 25 463237021 7 B 2
## 26 464376961 8 B 2
## 27 464378135 4 B 2
## 28 464402783 6 B 2
## 29 464409779 10 B 2
## 30 463256322 8 B 2
## 31 461180896 8 B 2
## 32 456103220 9 A 1
## 33 464022343 10 A 1
## 34 464204034 6 A 1
## 35 464289978 4 A 1
## 36 462862639 9 A 1
## 37 302888762 4 A 1
## 38 462498662 7 A 1
## 39 464350923 9 A 1
## 40 464370209 10 A 1
## 41 464372331 4 A 1
## 42 463237021 9 A 1
## 43 464376961 8 A 1
## 44 464378135 8 A 1
## 45 464402783 8 A 1
## 46 464409779 10 A 1
## 47 463256322 10 A 1
## 48 461180896 8 A 1
## 49 457895107 10 A 2
## 50 464101650 8 A 2
## 51 464168937 6 A 2
## 52 464176866 8 A 2
## 53 460185122 6 A 2
## 54 464199019 10 A 2
## 55 464243752 8 A 2
## 56 463230127 4 A 2
## 57 464279823 8 A 2
## 58 464283964 3 A 2
## 59 463682411 10 A 2
## 60 464386065 10 A 2
## 61 464386169 10 A 2
## 62 463510048 9 A 2
# Data cleaning, keeping subjects that completed both rating questionnaires
## Count how many unique Version each subject has
subjects_complete <- df %>%
group_by(Subject) %>%
summarise(n_version = n_distinct(Version)) %>%
filter(n_version == 2) %>% # only subjects who did both questionnaires
pull(Subject)
## Filter the main dataset
df_complete <- df %>% filter(Subject %in% subjects_complete)
n_distinct(df_complete$Subject) # check how many subjects remain
## [1] 31
print(df_complete)
## Subject Score Version Period
## 1 457895107 5 B 1
## 2 464101650 7 B 1
## 3 464168937 6 B 1
## 4 464176866 9 B 1
## 5 460185122 6 B 1
## 6 464199019 8 B 1
## 7 464243752 8 B 1
## 8 463230127 5 B 1
## 9 464279823 3 B 1
## 10 464283964 3 B 1
## 11 463682411 10 B 1
## 12 464386065 10 B 1
## 13 464386169 6 B 1
## 14 463510048 8 B 1
## 15 456103220 6 B 2
## 16 464022343 10 B 2
## 17 464204034 6 B 2
## 18 464289978 7 B 2
## 19 462862639 2 B 2
## 20 302888762 4 B 2
## 21 462498662 7 B 2
## 22 464350923 9 B 2
## 23 464370209 5 B 2
## 24 464372331 8 B 2
## 25 463237021 7 B 2
## 26 464376961 8 B 2
## 27 464378135 4 B 2
## 28 464402783 6 B 2
## 29 464409779 10 B 2
## 30 463256322 8 B 2
## 31 461180896 8 B 2
## 32 456103220 9 A 1
## 33 464022343 10 A 1
## 34 464204034 6 A 1
## 35 464289978 4 A 1
## 36 462862639 9 A 1
## 37 302888762 4 A 1
## 38 462498662 7 A 1
## 39 464350923 9 A 1
## 40 464370209 10 A 1
## 41 464372331 4 A 1
## 42 463237021 9 A 1
## 43 464376961 8 A 1
## 44 464378135 8 A 1
## 45 464402783 8 A 1
## 46 464409779 10 A 1
## 47 463256322 10 A 1
## 48 461180896 8 A 1
## 49 457895107 10 A 2
## 50 464101650 8 A 2
## 51 464168937 6 A 2
## 52 464176866 8 A 2
## 53 460185122 6 A 2
## 54 464199019 10 A 2
## 55 464243752 8 A 2
## 56 463230127 4 A 2
## 57 464279823 8 A 2
## 58 464283964 3 A 2
## 59 463682411 10 A 2
## 60 464386065 10 A 2
## 61 464386169 10 A 2
## 62 463510048 9 A 2
ggplot(df_complete, aes(x = Version, y = Score, fill = factor(Period))) +
geom_boxplot(position = position_dodge(width = 0.8)) +
labs(
title = "Scores by Version and Period",
x = "Version",
y = "Score",
fill = "Period"
) +
theme_minimal()

ggplot(df_complete, aes(x = Score, fill = Version)) +
geom_histogram(position = "identity", alpha = 0.6, bins = 10, color = "black") +
facet_wrap(~Version) +
labs(
title = "Distribution of Scores by Version",
x = "Score",
y = "Count"
) +
theme_minimal()

df_complete %>%
group_by(Version) %>%
summarise(
n = n(),
mean_score = mean(Score),
sd_score = sd(Score),
median_score = median(Score),
min_score = min(Score),
max_score = max(Score),
.groups = "drop"
)
## # A tibble: 2 × 7
## Version n mean_score sd_score median_score min_score max_score
## <chr> <int> <dbl> <dbl> <int> <int> <int>
## 1 A 31 7.84 2.18 8 3 10
## 2 B 31 6.74 2.18 7 2 10
df_wide <- df_complete %>%
select(Subject, Version, Score) %>%
pivot_wider(names_from = Version, values_from = Score)
print(df_wide)
## # A tibble: 31 × 3
## Subject B A
## <int> <int> <int>
## 1 457895107 5 10
## 2 464101650 7 8
## 3 464168937 6 6
## 4 464176866 9 8
## 5 460185122 6 6
## 6 464199019 8 10
## 7 464243752 8 8
## 8 463230127 5 4
## 9 464279823 3 8
## 10 464283964 3 3
## # ℹ 21 more rows
# Run Wilcoxon signed-rank test
wilcox.test(df_wide$A, df_wide$B, paired = TRUE, exact=TRUE)
## Warning in wilcox.test.default(df_wide$A, df_wide$B, paired = TRUE, exact =
## TRUE): cannot compute exact p-value with ties
## Warning in wilcox.test.default(df_wide$A, df_wide$B, paired = TRUE, exact =
## TRUE): cannot compute exact p-value with zeroes
##
## Wilcoxon signed rank test with continuity correction
##
## data: df_wide$A and df_wide$B
## V = 126.5, p-value = 0.01863
## alternative hypothesis: true location shift is not equal to 0
df_wide %>%
summarise(
median_A = median(A, na.rm = TRUE),
median_B = median(B, na.rm = TRUE)
)
## # A tibble: 1 × 2
## median_A median_B
## <int> <int>
## 1 8 7