1 Data prep

library(tidyverse)

## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr     1.1.4     ✔ readr     2.1.5
## ✔ forcats   1.0.0     ✔ stringr   1.5.1
## ✔ ggplot2   3.5.1     ✔ tibble    3.2.1
## ✔ lubridate 1.9.4     ✔ tidyr     1.3.1
## ✔ purrr     1.0.4     
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors

library(purrr)
library(broom)
library(dplyr)
library(skimr)
library(DT)
library(car)

## Loading required package: carData
## 
## Attaching package: 'car'
## 
## The following object is masked from 'package:dplyr':
## 
##     recode
## 
## The following object is masked from 'package:purrr':
## 
##     some

file1 <- read.csv("gamex_data.csv")
file2 <- read.csv("Adding-Variables.csv")

file1 <- file1 %>% 
  rename(participant = What.is.your.participant.code.)
file2 <- file2 %>% 
  rename(participant = subject)

# Quick check to make sure 'participant' column exists and looks consistent
head(file1$participant)

## [1] "PI1"  "PI10" "PI11" "PI12" "PI13" "PI14"

head(file2$participant)

## [1] "PI1"  "PI10" "PI11" "PI12" "PI13" "PI14"

# Merge the data on the 'participant' column
merged_data <- left_join(file1, file2, by = "participant")

# Optional: Save the merged data to a new CSV
write.csv(merged_data, "merged_game_data.csv", row.names = FALSE)

# View a quick summary
head(merged_data)

##   participant Group Pre...EngagementScore Pre...DifficultyScore
## 1         PI1     I                    29                    25
## 2        PI10     I                    34                    24
## 3        PI11     I                    34                    24
## 4        PI12     I                    29                    26
## 5        PI13     I                    31                    25
## 6        PI14     I                    33                    25
##   Pre...CuriosityScore Pre...LevelDifficultyScore Pre...EnjoymentScore
## 1                   45                         16                   31
## 2                   35                         15                   33
## 3                   42                         15                   40
## 4                   34                         11                   25
## 5                   39                         15                   31
## 6                   40                         21                   35
##   Post...EngagementScore Post...DifficultyScore Post...CuriosityScore
## 1                     29                     23                    44
## 2                     24                     28                    34
## 3                     34                     25                    42
## 4                     37                     26                    32
## 5                     37                     26                    38
## 6                     35                     23                    38
##   Post...LevelDifficultyScore Post...EnjoymentScore D...EngagementScore
## 1                          46                    31                   0
## 2                           5                    22                 -10
## 3                          30                    39                   0
## 4                          33                    35                   8
## 5                          40                    36                   6
## 6                          37                    37                   2
##   D...DifficultyScore D...CuriosityScore D...LevelDifficultyScore D...Enjoyment
## 1                  -2                 -1                       30             0
## 2                   4                 -1                      -10           -11
## 3                   1                  0                       15            -1
## 4                   0                 -2                       22            10
## 5                   1                 -1                       25             5
## 6                  -2                 -2                       16             2
##   Game.Experience GameX.Split Duration..in.seconds.   Age
## 1            3120        High                   388 20.85
## 2            3328        High                   230 20.60
## 3            2080         Low                   386 18.60
## 4            4732        High                   520 20.43
## 5            5200        High                   355 19.27
## 6           29120        High                   579 19.85
##   Gender.Identity...Cleaned Ethnic.identity...Cleaned Language.1
## 1                    Female                  Hispanic    English
## 2                    Female                     White    English
## 3                    Female                     Asian    English
## 4                    Female                     Black    English
## 5                Non-binary                     White    English
## 6                Non-binary                     White    English

# Data cleaning and renaming
merged_data <- merged_data %>%
  rename(
    Gender = Gender.Identity...Cleaned,
    Ethnicity = Ethnic.identity...Cleaned,
    Age = Age,
    Group = Group, 
    Duration = Duration..in.seconds.
  )

# Filter out rows with missing gender
merged_data <- merged_data %>%
  filter(!is.na(Gender))

# Convert variables to appropriate types
merged_data <- merged_data %>%
  mutate(
    Group = as.factor(Group),
    Gender = as.factor(Gender),
    Ethnicity = as.factor(Ethnicity),
    Age = as.numeric(Age)
  )

# Rename column for clarity
names(merged_data) <- tolower(names(merged_data))
merged_data <- merged_data %>% 
  rename(subject = participant)

# Pivot data to long format
long_data.check <- merged_data %>%
  pivot_longer(
    cols = starts_with("pre") | starts_with("post"),  # Select columns that start with 'pre' or 'post'
    names_to = c("time", "construct"),  # Create time and construct columns
    names_pattern = "(pre|post)\\.\\.\\.(.*)score",  # Regex to match 'pre...' and 'post...'
    values_to = "score"  # Values to go into the 'score' column
  ) %>%
  mutate(time = factor(time, levels = c("pre", "post")))  # Convert 'time' to a factor with 'pre' and 'post'

## Get skim summary
skim_summary <- merged_data %>%
  group_by(gender) %>%
  skim(post...enjoymentscore, post...curiosityscore, post...difficultyscore, 
       post...engagementscore, post...leveldifficultyscore, duration)

# Get count per group
group_counts <- merged_data %>%
  group_by(gender) %>%
  summarise(n = n())

# Join count to skim summary
skim_summary_with_counts <- skim_summary %>%
  left_join(group_counts, by = c("gender" = "gender"))

# View final result
skim_summary_with_counts

Data summary
Name	Piped data
Number of rows	120
Number of columns	24
_______________________
Column type frequency:
numeric	6
________________________
Group variables	gender

Variable type: numeric

skim_variable	gender	complete_rate	mean	sd	p0	p25	p50	p75	p100	hist	n
post…enjoymentscore	Female	1	31.14	4.84	21	28.0	32.0	35.00	39	▃▃▅▇▅	49
post…enjoymentscore	Male	1	32.66	4.25	19	31.0	33.0	35.75	39	▁▂▃▇▅	62
post…enjoymentscore	Non-binary	1	36.22	2.17	32	35.0	36.0	37.00	39	▂▁▇▃▃	9
post…curiosityscore	Female	1	37.82	4.04	30	35.0	37.0	41.00	45	▅▇▆▃▅	49
post…curiosityscore	Male	1	39.76	3.29	34	37.0	40.0	42.00	45	▇▅▆▆▇	62
post…curiosityscore	Non-binary	1	41.44	2.46	38	40.0	42.0	43.00	45	▅▂▇▂▅	9
post…difficultyscore	Female	1	25.43	1.71	22	24.0	26.0	26.00	29	▂▃▇▃▂	49
post…difficultyscore	Male	1	24.34	2.48	17	23.0	25.0	26.00	29	▁▂▅▇▂	62
post…difficultyscore	Non-binary	1	24.56	1.74	22	23.0	24.0	26.00	27	▇▅▁▇▂	9
post…engagementscore	Female	1	33.10	4.57	19	30.0	34.0	36.00	43	▁▂▅▇▁	49
post…engagementscore	Male	1	33.19	4.18	22	31.0	33.0	36.75	41	▁▃▇▇▃	62
post…engagementscore	Non-binary	1	34.00	2.06	31	32.0	35.0	35.00	37	▇▂▁▇▅	9
post…leveldifficultyscore	Female	1	32.61	13.98	0	25.0	36.0	43.00	64	▃▃▇▇▂	49
post…leveldifficultyscore	Male	1	29.27	12.69	0	23.0	33.0	38.00	49	▂▂▇▇▅	62
post…leveldifficultyscore	Non-binary	1	27.33	7.62	16	23.0	26.0	31.00	40	▂▇▅▂▅	9
duration	Female	1	428.18	254.92	153	259.0	360.0	504.00	1415	▇▃▁▁▁	49
duration	Male	1	419.90	203.39	135	264.5	360.5	526.00	1123	▇▆▃▁▁	62
duration	Non-binary	1	522.22	264.53	199	355.0	550.0	619.00	1088	▇▂▆▁▂	9

ggplot(merged_data, aes(x = "", y = duration)) +
  geom_boxplot(fill = "skyblue", color = "black") +
  labs(title = "Boxplot of Duration", x = "", y = "Duration (seconds)") +
  theme_minimal()

2 Models

model_enjoyment_checking <- lm(score ~ group + gender + ethnicity + age + duration + time, data = filter(long_data.check, construct == "enjoyment"))


model_curiosity_checking <- lm(score ~ group + gender + ethnicity + age + duration + time, data = filter(long_data.check, construct == "curiosity"))


model_difficulty_checking <- lm(score ~ group + gender + ethnicity + age + duration + time, data = filter(long_data.check, construct == "difficulty"))


model_leveldiff_checking <- lm(score ~ group + gender + ethnicity + age + duration + time, data = filter(long_data.check, construct == "leveldifficulty"))


model_engagement_checking <- lm(score ~ group + gender + ethnicity + age + duration + time, data = filter(long_data.check, construct == "engagement"))

# Summarize the models
summary(model_enjoyment_checking)

## 
## Call:
## lm(formula = score ~ group + gender + ethnicity + age + duration + 
##     time, data = filter(long_data.check, construct == "enjoyment"))
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -13.7498  -2.1257   0.6324   3.0319   8.4208 
## 
## Coefficients:
##                    Estimate Std. Error t value Pr(>|t|)    
## (Intercept)       36.684467   3.170713  11.570  < 2e-16 ***
## groupO             0.683961   0.797495   0.858   0.3920    
## groupS             0.437927   0.786281   0.557   0.5781    
## groupT             1.405317   0.802670   1.751   0.0813 .  
## genderMale        -0.077366   0.592863  -0.130   0.8963    
## genderNon-binary   1.659509   1.129870   1.469   0.1433    
## ethnicityAsian    -4.676303   3.063532  -1.526   0.1283    
## ethnicityBlack    -5.018500   3.191429  -1.572   0.1172    
## ethnicityHispanic -3.378641   3.202119  -1.055   0.2925    
## ethnicityIndian   -6.468739   3.463990  -1.867   0.0631 .  
## ethnicityMixed    -3.559272   3.199013  -1.113   0.2671    
## ethnicityWhite    -3.399512   3.042772  -1.117   0.2651    
## age                0.016818   0.054652   0.308   0.7586    
## duration           0.002245   0.001216   1.846   0.0662 .  
## timepost          -2.608333   0.539193  -4.837 2.44e-06 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 4.177 on 225 degrees of freedom
## Multiple R-squared:  0.169,  Adjusted R-squared:  0.1173 
## F-statistic: 3.268 on 14 and 225 DF,  p-value: 9.437e-05

summary(model_curiosity_checking)

## 
## Call:
## lm(formula = score ~ group + gender + ethnicity + age + duration + 
##     time, data = filter(long_data.check, construct == "curiosity"))
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -15.3413  -2.4245  -0.2752   2.7161   8.0920 
## 
## Coefficients:
##                     Estimate Std. Error t value Pr(>|t|)    
## (Intercept)       39.7461419  2.7849245  14.272   <2e-16 ***
## groupO             1.3684916  0.7004620   1.954   0.0520 .  
## groupS             0.6310987  0.6906126   0.914   0.3618    
## groupT             0.8635862  0.7050068   1.225   0.2219    
## genderMale         0.4541917  0.5207279   0.872   0.3840    
## genderNon-binary   2.2675139  0.9923962   2.285   0.0233 *  
## ethnicityAsian    -3.3417110  2.6907852  -1.242   0.2156    
## ethnicityBlack    -3.2991379  2.8031206  -1.177   0.2405    
## ethnicityHispanic -0.9723542  2.8125100  -0.346   0.7299    
## ethnicityIndian   -2.0755152  3.0425185  -0.682   0.4958    
## ethnicityMixed    -1.1436478  2.8097816  -0.407   0.6844    
## ethnicityWhite    -1.4315315  2.6725509  -0.536   0.5927    
## age                0.0096059  0.0480025   0.200   0.8416    
## duration           0.0008128  0.0010683   0.761   0.4475    
## timepost          -0.4333333  0.4735880  -0.915   0.3612    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 3.668 on 225 degrees of freedom
## Multiple R-squared:  0.1155, Adjusted R-squared:  0.06045 
## F-statistic: 2.098 on 14 and 225 DF,  p-value: 0.01284

summary(model_difficulty_checking)

## 
## Call:
## lm(formula = score ~ group + gender + ethnicity + age + duration + 
##     time, data = filter(long_data.check, construct == "difficulty"))
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -12.046  -1.941   0.325   2.458   7.351 
## 
## Coefficients:
##                    Estimate Std. Error t value Pr(>|t|)    
## (Intercept)       21.872945   2.750094   7.954 8.77e-14 ***
## groupO            -1.176183   0.691701  -1.700 0.090434 .  
## groupS            -3.253220   0.681975  -4.770 3.31e-06 ***
## groupT            -2.515588   0.696189  -3.613 0.000373 ***
## genderMale        -0.076618   0.514215  -0.149 0.881687    
## genderNon-binary  -0.448012   0.979985  -0.457 0.647996    
## ethnicityAsian     0.209367   2.657132   0.079 0.937266    
## ethnicityBlack    -1.120589   2.768063  -0.405 0.685989    
## ethnicityHispanic -1.299578   2.777334  -0.468 0.640293    
## ethnicityIndian    1.269219   3.004466   0.422 0.673104    
## ethnicityMixed    -1.264954   2.774640  -0.456 0.648903    
## ethnicityWhite    -0.106742   2.639126  -0.040 0.967773    
## age                0.048328   0.047402   1.020 0.309046    
## duration          -0.001612   0.001055  -1.528 0.127795    
## timepost           4.625000   0.467665   9.890  < 2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 3.623 on 225 degrees of freedom
## Multiple R-squared:  0.3763, Adjusted R-squared:  0.3375 
## F-statistic: 9.696 on 14 and 225 DF,  p-value: < 2.2e-16

summary(model_leveldiff_checking)

## 
## Call:
## lm(formula = score ~ group + gender + ethnicity + age + duration + 
##     time, data = filter(long_data.check, construct == "leveldifficulty"))
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -31.787  -4.913   0.373   5.601  28.699 
## 
## Coefficients:
##                    Estimate Std. Error t value Pr(>|t|)    
## (Intercept)       15.880313   7.382693   2.151   0.0325 *  
## groupO            -0.985109   1.856889  -0.531   0.5963    
## groupS            -3.035784   1.830779  -1.658   0.0987 .  
## groupT            -2.584938   1.868937  -1.383   0.1680    
## genderMale        -0.150768   1.380423  -0.109   0.9131    
## genderNon-binary  -1.532549   2.630792  -0.583   0.5608    
## ethnicityAsian     0.766072   7.133134   0.107   0.9146    
## ethnicityBlack     6.668611   7.430929   0.897   0.3705    
## ethnicityHispanic  0.941220   7.455820   0.126   0.8997    
## ethnicityIndian    2.000664   8.065561   0.248   0.8043    
## ethnicityMixed    -3.841481   7.448587  -0.516   0.6065    
## ethnicityWhite     0.499866   7.084796   0.071   0.9438    
## age               -0.056629   0.127252  -0.445   0.6567    
## duration           0.004496   0.002832   1.587   0.1138    
## timepost          15.100000   1.255458  12.027   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 9.725 on 225 degrees of freedom
## Multiple R-squared:  0.4205, Adjusted R-squared:  0.3844 
## F-statistic: 11.66 on 14 and 225 DF,  p-value: < 2.2e-16

summary(model_engagement_checking)

## 
## Call:
## lm(formula = score ~ group + gender + ethnicity + age + duration + 
##     time, data = filter(long_data.check, construct == "engagement"))
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -13.3854  -2.8642   0.2696   2.9595  10.2097 
## 
## Coefficients:
##                     Estimate Std. Error t value Pr(>|t|)    
## (Intercept)       32.5338125  3.1464984  10.340   <2e-16 ***
## groupO             0.0176724  0.7914047   0.022    0.982    
## groupS            -0.1993399  0.7802765  -0.255    0.799    
## groupT             0.5138193  0.7965397   0.645    0.520    
## genderMale         0.2844414  0.5883354   0.483    0.629    
## genderNon-binary   0.8214129  1.1212416   0.733    0.465    
## ethnicityAsian    -1.3631840  3.0401367  -0.448    0.654    
## ethnicityBlack    -1.5103447  3.1670569  -0.477    0.634    
## ethnicityHispanic  0.2576921  3.1776653   0.081    0.935    
## ethnicityIndian    0.0402606  3.4375364   0.012    0.991    
## ethnicityMixed    -0.4710770  3.1745827  -0.148    0.882    
## ethnicityWhite    -0.8671461  3.0195350  -0.287    0.774    
## age                0.0303628  0.0542348   0.560    0.576    
## duration           0.0006589  0.0012070   0.546    0.586    
## timepost           0.3333333  0.5350752   0.623    0.534    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 4.145 on 225 degrees of freedom
## Multiple R-squared:  0.02529,    Adjusted R-squared:  -0.03536 
## F-statistic: 0.417 on 14 and 225 DF,  p-value: 0.9686

model <- lm(score ~ group + gender + ethnicity + age + time, data = long_data.check)
vif(model)

##               GVIF Df GVIF^(1/(2*Df))
## group     1.249345  3        1.037800
## gender    1.193614  2        1.045240
## ethnicity 1.280323  6        1.020806
## age       1.045615  1        1.022553
## time      1.000000  1        1.000000

3 Results table

# Survey constructs in long_data.check
survey_vars <- c("enjoyment", "curiosity", "difficulty", "leveldifficulty", "engagement")

# Fit models for each construct in long_data.check
models <- map(survey_vars, function(construct_name) {
  lm(score ~ group + gender + ethnicity + age + duration + time,
     data = long_data.check %>% 
       filter(construct == construct_name) %>% 
       droplevels())
})

names(models) <- survey_vars

# Tidy and combine results
tidy_results <- map_df(models, tidy, .id = "Survey")

# Format results
tidy_results <- tidy_results %>%
  mutate(
    estimate = round(estimate, 3),
    std.error = round(std.error, 3),
    statistic = round(statistic, 2),
    p.value.numeric = as.numeric(p.value),
    p.value = ifelse(is.na(p.value.numeric), NA_character_,
                     ifelse(p.value.numeric < 0.001, "<.001", as.character(round(p.value.numeric, 3))))
  )

# Interactive datatable with conditional formatting
datatable(
  tidy_results,
  filter = "top",
  options = list(
    pageLength = 25,
    autoWidth = TRUE
  ),
  rownames = FALSE
) %>%
  formatStyle(
    'p.value.numeric',
    target = 'row',
    backgroundColor = styleInterval(
      c(0.001, 0.01, 0.05),
      c('#ffcccc', '#ffe0b3', '#ffffcc', 'white')  # red, orange, yellow, white
    )
  ) %>%
  formatStyle(
    'p.value',
    fontWeight = styleEqual("<.001", "bold")
  )

4 Overall Summary

To rule out the influence of demographic factors (gender, age, ethnicity) on key outcome measures, a series of linear models were fit using group, gender, ethnicity, age, and time (pre/post) as predictors. Below are the main takeaways across each outcome. The reference group is the impasse group:

Enjoyment: No demographic or group variables significantly predicted enjoyment, though there was a marginal trend suggesting higher enjoyment in the T group (p = .063) and among non-binary participants (p = .091). There was a significant decrease in enjoyment over time (p < .001).

Curiosity: The O group showed a small but significant increase in curiosity compared to the baseline group (p = .048), and non-binary participants reported significantly higher curiosity (p = .017). No other demographics were significant.

Difficulty: Participants in groups S and T reported significantly less difficulty than the reference group (p < .001). Time also had a strong effect, with post scores showing significantly higher difficulty (p < .001).

Perceived Level Difficulty: Similar to difficulty, S group participants reported marginally lower difficulty levels (p = .077), and post-test ratings were significantly higher (p < .001).

Engagement: No significant effects were found for group or demographic variables, and no significant change was observed from pre to post.

Survey duration was also not significant across all models.

Checking-Differences

Zack Carpenter

2025-04-07

1 Data prep

2 Models

3 Results table

4 Overall Summary