library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr 1.1.4 ✔ readr 2.1.5
## ✔ forcats 1.0.0 ✔ stringr 1.5.1
## ✔ ggplot2 3.5.1 ✔ tibble 3.2.1
## ✔ lubridate 1.9.4 ✔ tidyr 1.3.1
## ✔ purrr 1.0.4
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(purrr)
library(broom)
library(dplyr)
library(skimr)
library(DT)
library(car)
## Loading required package: carData
##
## Attaching package: 'car'
##
## The following object is masked from 'package:dplyr':
##
## recode
##
## The following object is masked from 'package:purrr':
##
## some
file1 <- read.csv("gamex_data.csv")
file2 <- read.csv("Adding-Variables.csv")
file1 <- file1 %>%
rename(participant = What.is.your.participant.code.)
file2 <- file2 %>%
rename(participant = subject)
# Quick check to make sure 'participant' column exists and looks consistent
head(file1$participant)
## [1] "PI1" "PI10" "PI11" "PI12" "PI13" "PI14"
head(file2$participant)
## [1] "PI1" "PI10" "PI11" "PI12" "PI13" "PI14"
# Merge the data on the 'participant' column
merged_data <- left_join(file1, file2, by = "participant")
# Optional: Save the merged data to a new CSV
write.csv(merged_data, "merged_game_data.csv", row.names = FALSE)
# View a quick summary
head(merged_data)
## participant Group Pre...EngagementScore Pre...DifficultyScore
## 1 PI1 I 29 25
## 2 PI10 I 34 24
## 3 PI11 I 34 24
## 4 PI12 I 29 26
## 5 PI13 I 31 25
## 6 PI14 I 33 25
## Pre...CuriosityScore Pre...LevelDifficultyScore Pre...EnjoymentScore
## 1 45 16 31
## 2 35 15 33
## 3 42 15 40
## 4 34 11 25
## 5 39 15 31
## 6 40 21 35
## Post...EngagementScore Post...DifficultyScore Post...CuriosityScore
## 1 29 23 44
## 2 24 28 34
## 3 34 25 42
## 4 37 26 32
## 5 37 26 38
## 6 35 23 38
## Post...LevelDifficultyScore Post...EnjoymentScore D...EngagementScore
## 1 46 31 0
## 2 5 22 -10
## 3 30 39 0
## 4 33 35 8
## 5 40 36 6
## 6 37 37 2
## D...DifficultyScore D...CuriosityScore D...LevelDifficultyScore D...Enjoyment
## 1 -2 -1 30 0
## 2 4 -1 -10 -11
## 3 1 0 15 -1
## 4 0 -2 22 10
## 5 1 -1 25 5
## 6 -2 -2 16 2
## Game.Experience GameX.Split Duration..in.seconds. Age
## 1 3120 High 388 20.85
## 2 3328 High 230 20.60
## 3 2080 Low 386 18.60
## 4 4732 High 520 20.43
## 5 5200 High 355 19.27
## 6 29120 High 579 19.85
## Gender.Identity...Cleaned Ethnic.identity...Cleaned Language.1
## 1 Female Hispanic English
## 2 Female White English
## 3 Female Asian English
## 4 Female Black English
## 5 Non-binary White English
## 6 Non-binary White English
# Data cleaning and renaming
merged_data <- merged_data %>%
rename(
Gender = Gender.Identity...Cleaned,
Ethnicity = Ethnic.identity...Cleaned,
Age = Age,
Group = Group,
Duration = Duration..in.seconds.
)
# Filter out rows with missing gender
merged_data <- merged_data %>%
filter(!is.na(Gender))
# Convert variables to appropriate types
merged_data <- merged_data %>%
mutate(
Group = as.factor(Group),
Gender = as.factor(Gender),
Ethnicity = as.factor(Ethnicity),
Age = as.numeric(Age)
)
# Rename column for clarity
names(merged_data) <- tolower(names(merged_data))
merged_data <- merged_data %>%
rename(subject = participant)
# Pivot data to long format
long_data.check <- merged_data %>%
pivot_longer(
cols = starts_with("pre") | starts_with("post"), # Select columns that start with 'pre' or 'post'
names_to = c("time", "construct"), # Create time and construct columns
names_pattern = "(pre|post)\\.\\.\\.(.*)score", # Regex to match 'pre...' and 'post...'
values_to = "score" # Values to go into the 'score' column
) %>%
mutate(time = factor(time, levels = c("pre", "post"))) # Convert 'time' to a factor with 'pre' and 'post'
## Get skim summary
skim_summary <- merged_data %>%
group_by(gender) %>%
skim(post...enjoymentscore, post...curiosityscore, post...difficultyscore,
post...engagementscore, post...leveldifficultyscore, duration)
# Get count per group
group_counts <- merged_data %>%
group_by(gender) %>%
summarise(n = n())
# Join count to skim summary
skim_summary_with_counts <- skim_summary %>%
left_join(group_counts, by = c("gender" = "gender"))
# View final result
skim_summary_with_counts
| Name | Piped data |
| Number of rows | 120 |
| Number of columns | 24 |
| _______________________ | |
| Column type frequency: | |
| numeric | 6 |
| ________________________ | |
| Group variables | gender |
Variable type: numeric
| skim_variable | gender | n_missing | complete_rate | mean | sd | p0 | p25 | p50 | p75 | p100 | hist | n |
|---|---|---|---|---|---|---|---|---|---|---|---|---|
| post…enjoymentscore | Female | 0 | 1 | 31.14 | 4.84 | 21 | 28.0 | 32.0 | 35.00 | 39 | ▃▃▅▇▅ | 49 |
| post…enjoymentscore | Male | 0 | 1 | 32.66 | 4.25 | 19 | 31.0 | 33.0 | 35.75 | 39 | ▁▂▃▇▅ | 62 |
| post…enjoymentscore | Non-binary | 0 | 1 | 36.22 | 2.17 | 32 | 35.0 | 36.0 | 37.00 | 39 | ▂▁▇▃▃ | 9 |
| post…curiosityscore | Female | 0 | 1 | 37.82 | 4.04 | 30 | 35.0 | 37.0 | 41.00 | 45 | ▅▇▆▃▅ | 49 |
| post…curiosityscore | Male | 0 | 1 | 39.76 | 3.29 | 34 | 37.0 | 40.0 | 42.00 | 45 | ▇▅▆▆▇ | 62 |
| post…curiosityscore | Non-binary | 0 | 1 | 41.44 | 2.46 | 38 | 40.0 | 42.0 | 43.00 | 45 | ▅▂▇▂▅ | 9 |
| post…difficultyscore | Female | 0 | 1 | 25.43 | 1.71 | 22 | 24.0 | 26.0 | 26.00 | 29 | ▂▃▇▃▂ | 49 |
| post…difficultyscore | Male | 0 | 1 | 24.34 | 2.48 | 17 | 23.0 | 25.0 | 26.00 | 29 | ▁▂▅▇▂ | 62 |
| post…difficultyscore | Non-binary | 0 | 1 | 24.56 | 1.74 | 22 | 23.0 | 24.0 | 26.00 | 27 | ▇▅▁▇▂ | 9 |
| post…engagementscore | Female | 0 | 1 | 33.10 | 4.57 | 19 | 30.0 | 34.0 | 36.00 | 43 | ▁▂▅▇▁ | 49 |
| post…engagementscore | Male | 0 | 1 | 33.19 | 4.18 | 22 | 31.0 | 33.0 | 36.75 | 41 | ▁▃▇▇▃ | 62 |
| post…engagementscore | Non-binary | 0 | 1 | 34.00 | 2.06 | 31 | 32.0 | 35.0 | 35.00 | 37 | ▇▂▁▇▅ | 9 |
| post…leveldifficultyscore | Female | 0 | 1 | 32.61 | 13.98 | 0 | 25.0 | 36.0 | 43.00 | 64 | ▃▃▇▇▂ | 49 |
| post…leveldifficultyscore | Male | 0 | 1 | 29.27 | 12.69 | 0 | 23.0 | 33.0 | 38.00 | 49 | ▂▂▇▇▅ | 62 |
| post…leveldifficultyscore | Non-binary | 0 | 1 | 27.33 | 7.62 | 16 | 23.0 | 26.0 | 31.00 | 40 | ▂▇▅▂▅ | 9 |
| duration | Female | 0 | 1 | 428.18 | 254.92 | 153 | 259.0 | 360.0 | 504.00 | 1415 | ▇▃▁▁▁ | 49 |
| duration | Male | 0 | 1 | 419.90 | 203.39 | 135 | 264.5 | 360.5 | 526.00 | 1123 | ▇▆▃▁▁ | 62 |
| duration | Non-binary | 0 | 1 | 522.22 | 264.53 | 199 | 355.0 | 550.0 | 619.00 | 1088 | ▇▂▆▁▂ | 9 |
ggplot(merged_data, aes(x = "", y = duration)) +
geom_boxplot(fill = "skyblue", color = "black") +
labs(title = "Boxplot of Duration", x = "", y = "Duration (seconds)") +
theme_minimal()
model_enjoyment_checking <- lm(score ~ group + gender + ethnicity + age + duration + time, data = filter(long_data.check, construct == "enjoyment"))
model_curiosity_checking <- lm(score ~ group + gender + ethnicity + age + duration + time, data = filter(long_data.check, construct == "curiosity"))
model_difficulty_checking <- lm(score ~ group + gender + ethnicity + age + duration + time, data = filter(long_data.check, construct == "difficulty"))
model_leveldiff_checking <- lm(score ~ group + gender + ethnicity + age + duration + time, data = filter(long_data.check, construct == "leveldifficulty"))
model_engagement_checking <- lm(score ~ group + gender + ethnicity + age + duration + time, data = filter(long_data.check, construct == "engagement"))
# Summarize the models
summary(model_enjoyment_checking)
##
## Call:
## lm(formula = score ~ group + gender + ethnicity + age + duration +
## time, data = filter(long_data.check, construct == "enjoyment"))
##
## Residuals:
## Min 1Q Median 3Q Max
## -13.7498 -2.1257 0.6324 3.0319 8.4208
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 36.684467 3.170713 11.570 < 2e-16 ***
## groupO 0.683961 0.797495 0.858 0.3920
## groupS 0.437927 0.786281 0.557 0.5781
## groupT 1.405317 0.802670 1.751 0.0813 .
## genderMale -0.077366 0.592863 -0.130 0.8963
## genderNon-binary 1.659509 1.129870 1.469 0.1433
## ethnicityAsian -4.676303 3.063532 -1.526 0.1283
## ethnicityBlack -5.018500 3.191429 -1.572 0.1172
## ethnicityHispanic -3.378641 3.202119 -1.055 0.2925
## ethnicityIndian -6.468739 3.463990 -1.867 0.0631 .
## ethnicityMixed -3.559272 3.199013 -1.113 0.2671
## ethnicityWhite -3.399512 3.042772 -1.117 0.2651
## age 0.016818 0.054652 0.308 0.7586
## duration 0.002245 0.001216 1.846 0.0662 .
## timepost -2.608333 0.539193 -4.837 2.44e-06 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 4.177 on 225 degrees of freedom
## Multiple R-squared: 0.169, Adjusted R-squared: 0.1173
## F-statistic: 3.268 on 14 and 225 DF, p-value: 9.437e-05
summary(model_curiosity_checking)
##
## Call:
## lm(formula = score ~ group + gender + ethnicity + age + duration +
## time, data = filter(long_data.check, construct == "curiosity"))
##
## Residuals:
## Min 1Q Median 3Q Max
## -15.3413 -2.4245 -0.2752 2.7161 8.0920
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 39.7461419 2.7849245 14.272 <2e-16 ***
## groupO 1.3684916 0.7004620 1.954 0.0520 .
## groupS 0.6310987 0.6906126 0.914 0.3618
## groupT 0.8635862 0.7050068 1.225 0.2219
## genderMale 0.4541917 0.5207279 0.872 0.3840
## genderNon-binary 2.2675139 0.9923962 2.285 0.0233 *
## ethnicityAsian -3.3417110 2.6907852 -1.242 0.2156
## ethnicityBlack -3.2991379 2.8031206 -1.177 0.2405
## ethnicityHispanic -0.9723542 2.8125100 -0.346 0.7299
## ethnicityIndian -2.0755152 3.0425185 -0.682 0.4958
## ethnicityMixed -1.1436478 2.8097816 -0.407 0.6844
## ethnicityWhite -1.4315315 2.6725509 -0.536 0.5927
## age 0.0096059 0.0480025 0.200 0.8416
## duration 0.0008128 0.0010683 0.761 0.4475
## timepost -0.4333333 0.4735880 -0.915 0.3612
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 3.668 on 225 degrees of freedom
## Multiple R-squared: 0.1155, Adjusted R-squared: 0.06045
## F-statistic: 2.098 on 14 and 225 DF, p-value: 0.01284
summary(model_difficulty_checking)
##
## Call:
## lm(formula = score ~ group + gender + ethnicity + age + duration +
## time, data = filter(long_data.check, construct == "difficulty"))
##
## Residuals:
## Min 1Q Median 3Q Max
## -12.046 -1.941 0.325 2.458 7.351
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 21.872945 2.750094 7.954 8.77e-14 ***
## groupO -1.176183 0.691701 -1.700 0.090434 .
## groupS -3.253220 0.681975 -4.770 3.31e-06 ***
## groupT -2.515588 0.696189 -3.613 0.000373 ***
## genderMale -0.076618 0.514215 -0.149 0.881687
## genderNon-binary -0.448012 0.979985 -0.457 0.647996
## ethnicityAsian 0.209367 2.657132 0.079 0.937266
## ethnicityBlack -1.120589 2.768063 -0.405 0.685989
## ethnicityHispanic -1.299578 2.777334 -0.468 0.640293
## ethnicityIndian 1.269219 3.004466 0.422 0.673104
## ethnicityMixed -1.264954 2.774640 -0.456 0.648903
## ethnicityWhite -0.106742 2.639126 -0.040 0.967773
## age 0.048328 0.047402 1.020 0.309046
## duration -0.001612 0.001055 -1.528 0.127795
## timepost 4.625000 0.467665 9.890 < 2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 3.623 on 225 degrees of freedom
## Multiple R-squared: 0.3763, Adjusted R-squared: 0.3375
## F-statistic: 9.696 on 14 and 225 DF, p-value: < 2.2e-16
summary(model_leveldiff_checking)
##
## Call:
## lm(formula = score ~ group + gender + ethnicity + age + duration +
## time, data = filter(long_data.check, construct == "leveldifficulty"))
##
## Residuals:
## Min 1Q Median 3Q Max
## -31.787 -4.913 0.373 5.601 28.699
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 15.880313 7.382693 2.151 0.0325 *
## groupO -0.985109 1.856889 -0.531 0.5963
## groupS -3.035784 1.830779 -1.658 0.0987 .
## groupT -2.584938 1.868937 -1.383 0.1680
## genderMale -0.150768 1.380423 -0.109 0.9131
## genderNon-binary -1.532549 2.630792 -0.583 0.5608
## ethnicityAsian 0.766072 7.133134 0.107 0.9146
## ethnicityBlack 6.668611 7.430929 0.897 0.3705
## ethnicityHispanic 0.941220 7.455820 0.126 0.8997
## ethnicityIndian 2.000664 8.065561 0.248 0.8043
## ethnicityMixed -3.841481 7.448587 -0.516 0.6065
## ethnicityWhite 0.499866 7.084796 0.071 0.9438
## age -0.056629 0.127252 -0.445 0.6567
## duration 0.004496 0.002832 1.587 0.1138
## timepost 15.100000 1.255458 12.027 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 9.725 on 225 degrees of freedom
## Multiple R-squared: 0.4205, Adjusted R-squared: 0.3844
## F-statistic: 11.66 on 14 and 225 DF, p-value: < 2.2e-16
summary(model_engagement_checking)
##
## Call:
## lm(formula = score ~ group + gender + ethnicity + age + duration +
## time, data = filter(long_data.check, construct == "engagement"))
##
## Residuals:
## Min 1Q Median 3Q Max
## -13.3854 -2.8642 0.2696 2.9595 10.2097
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 32.5338125 3.1464984 10.340 <2e-16 ***
## groupO 0.0176724 0.7914047 0.022 0.982
## groupS -0.1993399 0.7802765 -0.255 0.799
## groupT 0.5138193 0.7965397 0.645 0.520
## genderMale 0.2844414 0.5883354 0.483 0.629
## genderNon-binary 0.8214129 1.1212416 0.733 0.465
## ethnicityAsian -1.3631840 3.0401367 -0.448 0.654
## ethnicityBlack -1.5103447 3.1670569 -0.477 0.634
## ethnicityHispanic 0.2576921 3.1776653 0.081 0.935
## ethnicityIndian 0.0402606 3.4375364 0.012 0.991
## ethnicityMixed -0.4710770 3.1745827 -0.148 0.882
## ethnicityWhite -0.8671461 3.0195350 -0.287 0.774
## age 0.0303628 0.0542348 0.560 0.576
## duration 0.0006589 0.0012070 0.546 0.586
## timepost 0.3333333 0.5350752 0.623 0.534
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 4.145 on 225 degrees of freedom
## Multiple R-squared: 0.02529, Adjusted R-squared: -0.03536
## F-statistic: 0.417 on 14 and 225 DF, p-value: 0.9686
model <- lm(score ~ group + gender + ethnicity + age + time, data = long_data.check)
vif(model)
## GVIF Df GVIF^(1/(2*Df))
## group 1.249345 3 1.037800
## gender 1.193614 2 1.045240
## ethnicity 1.280323 6 1.020806
## age 1.045615 1 1.022553
## time 1.000000 1 1.000000
# Survey constructs in long_data.check
survey_vars <- c("enjoyment", "curiosity", "difficulty", "leveldifficulty", "engagement")
# Fit models for each construct in long_data.check
models <- map(survey_vars, function(construct_name) {
lm(score ~ group + gender + ethnicity + age + duration + time,
data = long_data.check %>%
filter(construct == construct_name) %>%
droplevels())
})
names(models) <- survey_vars
# Tidy and combine results
tidy_results <- map_df(models, tidy, .id = "Survey")
# Format results
tidy_results <- tidy_results %>%
mutate(
estimate = round(estimate, 3),
std.error = round(std.error, 3),
statistic = round(statistic, 2),
p.value.numeric = as.numeric(p.value),
p.value = ifelse(is.na(p.value.numeric), NA_character_,
ifelse(p.value.numeric < 0.001, "<.001", as.character(round(p.value.numeric, 3))))
)
# Interactive datatable with conditional formatting
datatable(
tidy_results,
filter = "top",
options = list(
pageLength = 25,
autoWidth = TRUE
),
rownames = FALSE
) %>%
formatStyle(
'p.value.numeric',
target = 'row',
backgroundColor = styleInterval(
c(0.001, 0.01, 0.05),
c('#ffcccc', '#ffe0b3', '#ffffcc', 'white') # red, orange, yellow, white
)
) %>%
formatStyle(
'p.value',
fontWeight = styleEqual("<.001", "bold")
)
To rule out the influence of demographic factors (gender, age, ethnicity) on key outcome measures, a series of linear models were fit using group, gender, ethnicity, age, and time (pre/post) as predictors. Below are the main takeaways across each outcome. The reference group is the impasse group:
Enjoyment: No demographic or group variables significantly predicted enjoyment, though there was a marginal trend suggesting higher enjoyment in the T group (p = .063) and among non-binary participants (p = .091). There was a significant decrease in enjoyment over time (p < .001).
Curiosity: The O group showed a small but significant increase in curiosity compared to the baseline group (p = .048), and non-binary participants reported significantly higher curiosity (p = .017). No other demographics were significant.
Difficulty: Participants in groups S and T reported significantly less difficulty than the reference group (p < .001). Time also had a strong effect, with post scores showing significantly higher difficulty (p < .001).
Perceived Level Difficulty: Similar to difficulty, S group participants reported marginally lower difficulty levels (p = .077), and post-test ratings were significantly higher (p < .001).
Engagement: No significant effects were found for group or demographic variables, and no significant change was observed from pre to post.
Survey duration was also not significant across all models.