1 Data prep

library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr     1.1.4     ✔ readr     2.1.5
## ✔ forcats   1.0.0     ✔ stringr   1.5.1
## ✔ ggplot2   3.5.1     ✔ tibble    3.2.1
## ✔ lubridate 1.9.4     ✔ tidyr     1.3.1
## ✔ purrr     1.0.4     
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(purrr)
library(broom)
library(dplyr)
library(skimr)
library(DT)
library(car)
## Loading required package: carData
## 
## Attaching package: 'car'
## 
## The following object is masked from 'package:dplyr':
## 
##     recode
## 
## The following object is masked from 'package:purrr':
## 
##     some
file1 <- read.csv("gamex_data.csv")
file2 <- read.csv("Adding-Variables.csv")

file1 <- file1 %>% 
  rename(participant = What.is.your.participant.code.)
file2 <- file2 %>% 
  rename(participant = subject)

# Quick check to make sure 'participant' column exists and looks consistent
head(file1$participant)
## [1] "PI1"  "PI10" "PI11" "PI12" "PI13" "PI14"
head(file2$participant)
## [1] "PI1"  "PI10" "PI11" "PI12" "PI13" "PI14"
# Merge the data on the 'participant' column
merged_data <- left_join(file1, file2, by = "participant")

# Optional: Save the merged data to a new CSV
write.csv(merged_data, "merged_game_data.csv", row.names = FALSE)

# View a quick summary
head(merged_data)
##   participant Group Pre...EngagementScore Pre...DifficultyScore
## 1         PI1     I                    29                    25
## 2        PI10     I                    34                    24
## 3        PI11     I                    34                    24
## 4        PI12     I                    29                    26
## 5        PI13     I                    31                    25
## 6        PI14     I                    33                    25
##   Pre...CuriosityScore Pre...LevelDifficultyScore Pre...EnjoymentScore
## 1                   45                         16                   31
## 2                   35                         15                   33
## 3                   42                         15                   40
## 4                   34                         11                   25
## 5                   39                         15                   31
## 6                   40                         21                   35
##   Post...EngagementScore Post...DifficultyScore Post...CuriosityScore
## 1                     29                     23                    44
## 2                     24                     28                    34
## 3                     34                     25                    42
## 4                     37                     26                    32
## 5                     37                     26                    38
## 6                     35                     23                    38
##   Post...LevelDifficultyScore Post...EnjoymentScore D...EngagementScore
## 1                          46                    31                   0
## 2                           5                    22                 -10
## 3                          30                    39                   0
## 4                          33                    35                   8
## 5                          40                    36                   6
## 6                          37                    37                   2
##   D...DifficultyScore D...CuriosityScore D...LevelDifficultyScore D...Enjoyment
## 1                  -2                 -1                       30             0
## 2                   4                 -1                      -10           -11
## 3                   1                  0                       15            -1
## 4                   0                 -2                       22            10
## 5                   1                 -1                       25             5
## 6                  -2                 -2                       16             2
##   Game.Experience GameX.Split Duration..in.seconds.   Age
## 1            3120        High                   388 20.85
## 2            3328        High                   230 20.60
## 3            2080         Low                   386 18.60
## 4            4732        High                   520 20.43
## 5            5200        High                   355 19.27
## 6           29120        High                   579 19.85
##   Gender.Identity...Cleaned Ethnic.identity...Cleaned Language.1
## 1                    Female                  Hispanic    English
## 2                    Female                     White    English
## 3                    Female                     Asian    English
## 4                    Female                     Black    English
## 5                Non-binary                     White    English
## 6                Non-binary                     White    English
# Data cleaning and renaming
merged_data <- merged_data %>%
  rename(
    Gender = Gender.Identity...Cleaned,
    Ethnicity = Ethnic.identity...Cleaned,
    Age = Age,
    Group = Group, 
    Duration = Duration..in.seconds.
  )

# Filter out rows with missing gender
merged_data <- merged_data %>%
  filter(!is.na(Gender))

# Convert variables to appropriate types
merged_data <- merged_data %>%
  mutate(
    Group = as.factor(Group),
    Gender = as.factor(Gender),
    Ethnicity = as.factor(Ethnicity),
    Age = as.numeric(Age)
  )

# Rename column for clarity
names(merged_data) <- tolower(names(merged_data))
merged_data <- merged_data %>% 
  rename(subject = participant)

# Pivot data to long format
long_data.check <- merged_data %>%
  pivot_longer(
    cols = starts_with("pre") | starts_with("post"),  # Select columns that start with 'pre' or 'post'
    names_to = c("time", "construct"),  # Create time and construct columns
    names_pattern = "(pre|post)\\.\\.\\.(.*)score",  # Regex to match 'pre...' and 'post...'
    values_to = "score"  # Values to go into the 'score' column
  ) %>%
  mutate(time = factor(time, levels = c("pre", "post")))  # Convert 'time' to a factor with 'pre' and 'post'

## Get skim summary
skim_summary <- merged_data %>%
  group_by(gender) %>%
  skim(post...enjoymentscore, post...curiosityscore, post...difficultyscore, 
       post...engagementscore, post...leveldifficultyscore, duration)

# Get count per group
group_counts <- merged_data %>%
  group_by(gender) %>%
  summarise(n = n())

# Join count to skim summary
skim_summary_with_counts <- skim_summary %>%
  left_join(group_counts, by = c("gender" = "gender"))

# View final result
skim_summary_with_counts
Data summary
Name Piped data
Number of rows 120
Number of columns 24
_______________________
Column type frequency:
numeric 6
________________________
Group variables gender

Variable type: numeric

skim_variable gender n_missing complete_rate mean sd p0 p25 p50 p75 p100 hist n
post…enjoymentscore Female 0 1 31.14 4.84 21 28.0 32.0 35.00 39 ▃▃▅▇▅ 49
post…enjoymentscore Male 0 1 32.66 4.25 19 31.0 33.0 35.75 39 ▁▂▃▇▅ 62
post…enjoymentscore Non-binary 0 1 36.22 2.17 32 35.0 36.0 37.00 39 ▂▁▇▃▃ 9
post…curiosityscore Female 0 1 37.82 4.04 30 35.0 37.0 41.00 45 ▅▇▆▃▅ 49
post…curiosityscore Male 0 1 39.76 3.29 34 37.0 40.0 42.00 45 ▇▅▆▆▇ 62
post…curiosityscore Non-binary 0 1 41.44 2.46 38 40.0 42.0 43.00 45 ▅▂▇▂▅ 9
post…difficultyscore Female 0 1 25.43 1.71 22 24.0 26.0 26.00 29 ▂▃▇▃▂ 49
post…difficultyscore Male 0 1 24.34 2.48 17 23.0 25.0 26.00 29 ▁▂▅▇▂ 62
post…difficultyscore Non-binary 0 1 24.56 1.74 22 23.0 24.0 26.00 27 ▇▅▁▇▂ 9
post…engagementscore Female 0 1 33.10 4.57 19 30.0 34.0 36.00 43 ▁▂▅▇▁ 49
post…engagementscore Male 0 1 33.19 4.18 22 31.0 33.0 36.75 41 ▁▃▇▇▃ 62
post…engagementscore Non-binary 0 1 34.00 2.06 31 32.0 35.0 35.00 37 ▇▂▁▇▅ 9
post…leveldifficultyscore Female 0 1 32.61 13.98 0 25.0 36.0 43.00 64 ▃▃▇▇▂ 49
post…leveldifficultyscore Male 0 1 29.27 12.69 0 23.0 33.0 38.00 49 ▂▂▇▇▅ 62
post…leveldifficultyscore Non-binary 0 1 27.33 7.62 16 23.0 26.0 31.00 40 ▂▇▅▂▅ 9
duration Female 0 1 428.18 254.92 153 259.0 360.0 504.00 1415 ▇▃▁▁▁ 49
duration Male 0 1 419.90 203.39 135 264.5 360.5 526.00 1123 ▇▆▃▁▁ 62
duration Non-binary 0 1 522.22 264.53 199 355.0 550.0 619.00 1088 ▇▂▆▁▂ 9
ggplot(merged_data, aes(x = "", y = duration)) +
  geom_boxplot(fill = "skyblue", color = "black") +
  labs(title = "Boxplot of Duration", x = "", y = "Duration (seconds)") +
  theme_minimal()

2 Models

model_enjoyment_checking <- lm(score ~ group + gender + ethnicity + age + duration + time, data = filter(long_data.check, construct == "enjoyment"))


model_curiosity_checking <- lm(score ~ group + gender + ethnicity + age + duration + time, data = filter(long_data.check, construct == "curiosity"))


model_difficulty_checking <- lm(score ~ group + gender + ethnicity + age + duration + time, data = filter(long_data.check, construct == "difficulty"))


model_leveldiff_checking <- lm(score ~ group + gender + ethnicity + age + duration + time, data = filter(long_data.check, construct == "leveldifficulty"))


model_engagement_checking <- lm(score ~ group + gender + ethnicity + age + duration + time, data = filter(long_data.check, construct == "engagement"))

# Summarize the models
summary(model_enjoyment_checking)
## 
## Call:
## lm(formula = score ~ group + gender + ethnicity + age + duration + 
##     time, data = filter(long_data.check, construct == "enjoyment"))
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -13.7498  -2.1257   0.6324   3.0319   8.4208 
## 
## Coefficients:
##                    Estimate Std. Error t value Pr(>|t|)    
## (Intercept)       36.684467   3.170713  11.570  < 2e-16 ***
## groupO             0.683961   0.797495   0.858   0.3920    
## groupS             0.437927   0.786281   0.557   0.5781    
## groupT             1.405317   0.802670   1.751   0.0813 .  
## genderMale        -0.077366   0.592863  -0.130   0.8963    
## genderNon-binary   1.659509   1.129870   1.469   0.1433    
## ethnicityAsian    -4.676303   3.063532  -1.526   0.1283    
## ethnicityBlack    -5.018500   3.191429  -1.572   0.1172    
## ethnicityHispanic -3.378641   3.202119  -1.055   0.2925    
## ethnicityIndian   -6.468739   3.463990  -1.867   0.0631 .  
## ethnicityMixed    -3.559272   3.199013  -1.113   0.2671    
## ethnicityWhite    -3.399512   3.042772  -1.117   0.2651    
## age                0.016818   0.054652   0.308   0.7586    
## duration           0.002245   0.001216   1.846   0.0662 .  
## timepost          -2.608333   0.539193  -4.837 2.44e-06 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 4.177 on 225 degrees of freedom
## Multiple R-squared:  0.169,  Adjusted R-squared:  0.1173 
## F-statistic: 3.268 on 14 and 225 DF,  p-value: 9.437e-05
summary(model_curiosity_checking)
## 
## Call:
## lm(formula = score ~ group + gender + ethnicity + age + duration + 
##     time, data = filter(long_data.check, construct == "curiosity"))
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -15.3413  -2.4245  -0.2752   2.7161   8.0920 
## 
## Coefficients:
##                     Estimate Std. Error t value Pr(>|t|)    
## (Intercept)       39.7461419  2.7849245  14.272   <2e-16 ***
## groupO             1.3684916  0.7004620   1.954   0.0520 .  
## groupS             0.6310987  0.6906126   0.914   0.3618    
## groupT             0.8635862  0.7050068   1.225   0.2219    
## genderMale         0.4541917  0.5207279   0.872   0.3840    
## genderNon-binary   2.2675139  0.9923962   2.285   0.0233 *  
## ethnicityAsian    -3.3417110  2.6907852  -1.242   0.2156    
## ethnicityBlack    -3.2991379  2.8031206  -1.177   0.2405    
## ethnicityHispanic -0.9723542  2.8125100  -0.346   0.7299    
## ethnicityIndian   -2.0755152  3.0425185  -0.682   0.4958    
## ethnicityMixed    -1.1436478  2.8097816  -0.407   0.6844    
## ethnicityWhite    -1.4315315  2.6725509  -0.536   0.5927    
## age                0.0096059  0.0480025   0.200   0.8416    
## duration           0.0008128  0.0010683   0.761   0.4475    
## timepost          -0.4333333  0.4735880  -0.915   0.3612    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 3.668 on 225 degrees of freedom
## Multiple R-squared:  0.1155, Adjusted R-squared:  0.06045 
## F-statistic: 2.098 on 14 and 225 DF,  p-value: 0.01284
summary(model_difficulty_checking)
## 
## Call:
## lm(formula = score ~ group + gender + ethnicity + age + duration + 
##     time, data = filter(long_data.check, construct == "difficulty"))
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -12.046  -1.941   0.325   2.458   7.351 
## 
## Coefficients:
##                    Estimate Std. Error t value Pr(>|t|)    
## (Intercept)       21.872945   2.750094   7.954 8.77e-14 ***
## groupO            -1.176183   0.691701  -1.700 0.090434 .  
## groupS            -3.253220   0.681975  -4.770 3.31e-06 ***
## groupT            -2.515588   0.696189  -3.613 0.000373 ***
## genderMale        -0.076618   0.514215  -0.149 0.881687    
## genderNon-binary  -0.448012   0.979985  -0.457 0.647996    
## ethnicityAsian     0.209367   2.657132   0.079 0.937266    
## ethnicityBlack    -1.120589   2.768063  -0.405 0.685989    
## ethnicityHispanic -1.299578   2.777334  -0.468 0.640293    
## ethnicityIndian    1.269219   3.004466   0.422 0.673104    
## ethnicityMixed    -1.264954   2.774640  -0.456 0.648903    
## ethnicityWhite    -0.106742   2.639126  -0.040 0.967773    
## age                0.048328   0.047402   1.020 0.309046    
## duration          -0.001612   0.001055  -1.528 0.127795    
## timepost           4.625000   0.467665   9.890  < 2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 3.623 on 225 degrees of freedom
## Multiple R-squared:  0.3763, Adjusted R-squared:  0.3375 
## F-statistic: 9.696 on 14 and 225 DF,  p-value: < 2.2e-16
summary(model_leveldiff_checking)
## 
## Call:
## lm(formula = score ~ group + gender + ethnicity + age + duration + 
##     time, data = filter(long_data.check, construct == "leveldifficulty"))
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -31.787  -4.913   0.373   5.601  28.699 
## 
## Coefficients:
##                    Estimate Std. Error t value Pr(>|t|)    
## (Intercept)       15.880313   7.382693   2.151   0.0325 *  
## groupO            -0.985109   1.856889  -0.531   0.5963    
## groupS            -3.035784   1.830779  -1.658   0.0987 .  
## groupT            -2.584938   1.868937  -1.383   0.1680    
## genderMale        -0.150768   1.380423  -0.109   0.9131    
## genderNon-binary  -1.532549   2.630792  -0.583   0.5608    
## ethnicityAsian     0.766072   7.133134   0.107   0.9146    
## ethnicityBlack     6.668611   7.430929   0.897   0.3705    
## ethnicityHispanic  0.941220   7.455820   0.126   0.8997    
## ethnicityIndian    2.000664   8.065561   0.248   0.8043    
## ethnicityMixed    -3.841481   7.448587  -0.516   0.6065    
## ethnicityWhite     0.499866   7.084796   0.071   0.9438    
## age               -0.056629   0.127252  -0.445   0.6567    
## duration           0.004496   0.002832   1.587   0.1138    
## timepost          15.100000   1.255458  12.027   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 9.725 on 225 degrees of freedom
## Multiple R-squared:  0.4205, Adjusted R-squared:  0.3844 
## F-statistic: 11.66 on 14 and 225 DF,  p-value: < 2.2e-16
summary(model_engagement_checking)
## 
## Call:
## lm(formula = score ~ group + gender + ethnicity + age + duration + 
##     time, data = filter(long_data.check, construct == "engagement"))
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -13.3854  -2.8642   0.2696   2.9595  10.2097 
## 
## Coefficients:
##                     Estimate Std. Error t value Pr(>|t|)    
## (Intercept)       32.5338125  3.1464984  10.340   <2e-16 ***
## groupO             0.0176724  0.7914047   0.022    0.982    
## groupS            -0.1993399  0.7802765  -0.255    0.799    
## groupT             0.5138193  0.7965397   0.645    0.520    
## genderMale         0.2844414  0.5883354   0.483    0.629    
## genderNon-binary   0.8214129  1.1212416   0.733    0.465    
## ethnicityAsian    -1.3631840  3.0401367  -0.448    0.654    
## ethnicityBlack    -1.5103447  3.1670569  -0.477    0.634    
## ethnicityHispanic  0.2576921  3.1776653   0.081    0.935    
## ethnicityIndian    0.0402606  3.4375364   0.012    0.991    
## ethnicityMixed    -0.4710770  3.1745827  -0.148    0.882    
## ethnicityWhite    -0.8671461  3.0195350  -0.287    0.774    
## age                0.0303628  0.0542348   0.560    0.576    
## duration           0.0006589  0.0012070   0.546    0.586    
## timepost           0.3333333  0.5350752   0.623    0.534    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 4.145 on 225 degrees of freedom
## Multiple R-squared:  0.02529,    Adjusted R-squared:  -0.03536 
## F-statistic: 0.417 on 14 and 225 DF,  p-value: 0.9686
model <- lm(score ~ group + gender + ethnicity + age + time, data = long_data.check)
vif(model)
##               GVIF Df GVIF^(1/(2*Df))
## group     1.249345  3        1.037800
## gender    1.193614  2        1.045240
## ethnicity 1.280323  6        1.020806
## age       1.045615  1        1.022553
## time      1.000000  1        1.000000

3 Results table

# Survey constructs in long_data.check
survey_vars <- c("enjoyment", "curiosity", "difficulty", "leveldifficulty", "engagement")

# Fit models for each construct in long_data.check
models <- map(survey_vars, function(construct_name) {
  lm(score ~ group + gender + ethnicity + age + duration + time,
     data = long_data.check %>% 
       filter(construct == construct_name) %>% 
       droplevels())
})

names(models) <- survey_vars

# Tidy and combine results
tidy_results <- map_df(models, tidy, .id = "Survey")

# Format results
tidy_results <- tidy_results %>%
  mutate(
    estimate = round(estimate, 3),
    std.error = round(std.error, 3),
    statistic = round(statistic, 2),
    p.value.numeric = as.numeric(p.value),
    p.value = ifelse(is.na(p.value.numeric), NA_character_,
                     ifelse(p.value.numeric < 0.001, "<.001", as.character(round(p.value.numeric, 3))))
  )

# Interactive datatable with conditional formatting
datatable(
  tidy_results,
  filter = "top",
  options = list(
    pageLength = 25,
    autoWidth = TRUE
  ),
  rownames = FALSE
) %>%
  formatStyle(
    'p.value.numeric',
    target = 'row',
    backgroundColor = styleInterval(
      c(0.001, 0.01, 0.05),
      c('#ffcccc', '#ffe0b3', '#ffffcc', 'white')  # red, orange, yellow, white
    )
  ) %>%
  formatStyle(
    'p.value',
    fontWeight = styleEqual("<.001", "bold")
  )

4 Overall Summary

To rule out the influence of demographic factors (gender, age, ethnicity) on key outcome measures, a series of linear models were fit using group, gender, ethnicity, age, and time (pre/post) as predictors. Below are the main takeaways across each outcome. The reference group is the impasse group:

Enjoyment: No demographic or group variables significantly predicted enjoyment, though there was a marginal trend suggesting higher enjoyment in the T group (p = .063) and among non-binary participants (p = .091). There was a significant decrease in enjoyment over time (p < .001).

Curiosity: The O group showed a small but significant increase in curiosity compared to the baseline group (p = .048), and non-binary participants reported significantly higher curiosity (p = .017). No other demographics were significant.

Difficulty: Participants in groups S and T reported significantly less difficulty than the reference group (p < .001). Time also had a strong effect, with post scores showing significantly higher difficulty (p < .001).

Perceived Level Difficulty: Similar to difficulty, S group participants reported marginally lower difficulty levels (p = .077), and post-test ratings were significantly higher (p < .001).

Engagement: No significant effects were found for group or demographic variables, and no significant change was observed from pre to post.

Survey duration was also not significant across all models.