Methodologies
Setting up Global Options
knitr::opts_chunk$set(
echo = TRUE,
message = FALSE,
warning = FALSE
)
Load Libraries
library(dplyr)
library(tidyr)
library(ggplot2)
library(readxl)
library(knitr)
library(psych)
Load Data
# Load data
df <- read_excel("C:/Users/ADMIN/Downloads/FakeData-for-GameUse.xlsx")
Objective 1
To quantify the relationship between early math skills (Pretest Math
Skills and MidYearMathSkills) and long-term success (MathFinalTest) by
computing growth metrics and testing their predictive value.
Data Cleaning
# Rename variables for clarity
df <- df %>%
rename(
pretest = `Pretest Math Skills`,
midyear = MidYearMathSkills,
final = MathFinalTest
)
# Handle missing values (replace with mean value)
df <- df %>%
mutate(
pretest = if_else(is.na(pretest), mean(pretest, na.rm = TRUE), pretest),
midyear = if_else(is.na(midyear), mean(midyear, na.rm = TRUE), midyear),
final = if_else(is.na(final), mean(final, na.rm = TRUE), final)
)
# Compute for the growth measures
df <- df %>%
mutate(
absolute_growth = midyear - pretest,
relative_growth = if_else(pretest == 0, NA_real_, absolute_growth / pretest)
)
Descriptive Statistics
#Create a table for Descriptive Statistics
desc_table <- df %>%
summarise(
# Pretest
n_pretest = sum(!is.na(pretest)),
mean_pretest = mean(pretest, na.rm=TRUE),
sd_pretest = sd(pretest, na.rm=TRUE),
median_pretest = median(pretest, na.rm=TRUE),
IQR_pretest = IQR(pretest, na.rm=TRUE),
# Midyear
n_midyear = sum(!is.na(midyear)),
mean_midyear = mean(midyear, na.rm=TRUE),
sd_midyear = sd(midyear, na.rm=TRUE),
median_midyear = median(midyear, na.rm=TRUE),
IQR_midyear = IQR(midyear, na.rm=TRUE),
# Final
n_final = sum(!is.na(final)),
mean_final = mean(final, na.rm=TRUE),
sd_final = sd(final, na.rm=TRUE),
median_final = median(final, na.rm=TRUE),
IQR_final = IQR(final, na.rm=TRUE),
# Absolute growth
n_absolute = sum(!is.na(absolute_growth)),
mean_absolute = mean(absolute_growth, na.rm=TRUE),
sd_absolute = sd(absolute_growth, na.rm=TRUE),
median_absolute = median(absolute_growth, na.rm=TRUE),
IQR_absolute = IQR(absolute_growth, na.rm=TRUE),
# Relative growth
n_relative = sum(!is.na(relative_growth)),
mean_relative = mean(relative_growth, na.rm=TRUE),
sd_relative = sd(relative_growth, na.rm=TRUE),
median_relative = median(relative_growth, na.rm=TRUE),
IQR_relative = IQR(relative_growth, na.rm=TRUE)
) %>%
pivot_longer(everything(),
names_to = c("stat", "Variable"),
names_sep = "_") %>%
pivot_wider(names_from = stat, values_from = value) %>%
mutate(
growth = case_when(
Variable %in% c("absolute", "relative") ~ as.character(mean),
TRUE ~ "N/A"
)
)
kable(desc_table, digits = 3,
caption = "Descriptive Statistics of Math Scores and Growth")
Descriptive Statistics of Math Scores and Growth
| pretest |
1000 |
50.451 |
21.412 |
49.680 |
30.165 |
N/A |
| midyear |
1000 |
49.225 |
22.315 |
48.650 |
31.430 |
N/A |
| final |
1000 |
76.074 |
14.530 |
79.570 |
16.099 |
N/A |
| absolute |
1000 |
-1.226 |
3.587 |
-1.000 |
6.000 |
-1.226 |
| relative |
1000 |
-0.040 |
0.142 |
-0.021 |
0.119 |
-0.0395170617503993 |
Interpretation
The data shows that students’ average scores slightly declined from
pretest (M = 50.45) to midyear (M = 49.23), before increasing
significantly by the final (M = 76.07). The negative absolute and
relative growth values suggest a slight overall decline between pretest
and midyear, though performance improved later.
Test of Improvement (Pretest to MidYear)
# Difference
df <- df %>% mutate(diff = midyear - pretest)
# Paired t-test
tt <- t.test(df$pretest, df$midyear, paired = TRUE)
# Effect size
mean_diff <- mean(df$diff, na.rm=TRUE)
sd_diff <- sd(df$diff, na.rm=TRUE)
cohens_d <- mean_diff / sd_diff
tt_table <- data.frame(
Mean_Diff = mean_diff,
SD_Diff = sd_diff,
t_test = tt$statistic,
df = tt$parameter,
p_value = tt$p.value,
Cohens_d = cohens_d
)
kable(tt_table, digits = 3, caption = "Paired t-test: Pretest vs MidYear")
Paired t-test: Pretest vs MidYear
| t |
-1.226 |
3.587 |
10.808 |
999 |
0 |
-0.342 |
Interpretation
There was a statistically significant decrease in scores from
pretest to midyear, t(999) = 10.81, p < .001. The average decrease
was -1.226 points, with a small effect size (Cohen’s d = -0.34),
suggesting the change is statistically meaningful but modest in
practical impact.
Regression Modeling: baseline & incremental test
Fit models to test predictive value:
Model A: final ~ pretest (baseline)
Model B: final ~ pretest + midyear (adds midyear)
Model C: final ~ pretest + absolute_growth (tests growth’s added
value)
Compare R² / Adj R² / AIC and use anova() to test whether adding
midyear significantly improves fit.
modA <- lm(final ~ pretest, data = df)
modB <- lm(final ~ pretest + midyear, data = df)
modC <- lm(final ~ pretest + absolute_growth, data = df)
# Summary tables
broom::tidy(modA) %>% kable(digits = 4)
| (Intercept) |
60.3325 |
1.0453 |
57.7151 |
0 |
| pretest |
0.3120 |
0.0191 |
16.3571 |
0 |
broom::tidy(modB) %>% kable(digits = 4)
| (Intercept) |
59.8946 |
1.0901 |
54.9423 |
0.0000 |
| pretest |
0.4794 |
0.1204 |
3.9811 |
0.0001 |
| midyear |
-0.1627 |
0.1156 |
-1.4079 |
0.1595 |
broom::tidy(modC) %>% kable(digits = 4)
| (Intercept) |
59.8946 |
1.0901 |
54.9423 |
0.0000 |
| pretest |
0.3167 |
0.0194 |
16.3614 |
0.0000 |
| absolute_growth |
-0.1627 |
0.1156 |
-1.4079 |
0.1595 |
# Model comparison
models_glance <- bind_rows(broom::glance(modA) %>% mutate(model="A_pretest"),
broom::glance(modB) %>% mutate(model="B_pre_mid"),
broom::glance(modC) %>% mutate(model="C_pre_growth"))
models_glance %>% select(model, r.squared, adj.r.squared, AIC) %>% kable(digits=4)
| A_pretest |
0.2114 |
0.2106 |
7957.729 |
| B_pre_mid |
0.2130 |
0.2114 |
7957.743 |
| C_pre_growth |
0.2130 |
0.2114 |
7957.743 |
# ANOVA test whether adding midyear improves over pretest-only
anova(modA, modB)
Interpretation
Model A shows that pretest scores significantly predict final scores
(p < .001), explaining about 21.1% of the variance (R² = 0.2114).
Adding midyear scores (Model B) or absolute growth (Model C) slightly
increases R² to 0.2130, but the additional predictors are not
statistically significant (p = 0.16), and the ANOVA test confirms that
Model B does not significantly improve fit over Model A (F(1, 997) =
1.98, p = 0.159). Pretest alone is the most efficient predictor among
the three models tested.
Plot the Results
# Scatterplot Pretest vs Final
ggplot(df, aes(x = pretest, y = final)) +
geom_point(alpha=0.4) +
geom_smooth(method="lm", se=TRUE, color="blue") +
labs(title="Relationship between Pretest and Final Scores",
x="Pretest Score", y="Final Score")
The scatterplot shows a clear positive relationship between pretest and
final math scores, meaning students who started stronger tended to
perform better at the end. However, the spread indicates that pretest
alone does not fully determine final outcomes.
# Scatterplot Midyear vs Final
ggplot(df, aes(x = midyear, y = final)) +
geom_point(alpha=0.4, color="darkgreen") +
geom_smooth(method="lm", se=TRUE, color="black") +
labs(title="Relationship between Midyear and Final Scores",
x="Midyear Score", y="Final Score")
Midyear scores are also positively associated with final scores, though
the pattern is similar to pretest. This suggests that midyear
performance reflects continued learning but doesn’t add much predictive
power beyond the pretest.
# Scatterplot Absolute Growth vs Final
ggplot(df, aes(x = absolute_growth, y = final)) +
geom_point(alpha=0.4, color="red") +
geom_smooth(method="lm", se=TRUE, color="black") +
labs(title="Absolute Growth vs Final Scores",
x="Absolute Growth (Final - Pretest)", y="Final Score")
The plot shows no strong linear relationship between absolute growth and
final scores. Students with similar growth levels still achieved very
different final results, indicating that growth by itself is not a
reliable predictor of long-term success.
Discussion
The analysis confirmed that students’ early math skills,
particularly their pretest scores, are strong predictors of final
performance. Although there was a small drop from pretest to midyear,
the final scores improved substantially. This suggests that early
identification of students’ abilities is critical, as interventions or
additional support can be targeted before the midyear mark. The fact
that midyear scores or growth metrics added little predictive value
implies that initial competency captures most of the variance in
outcomes, emphasizing the importance of early assessment and baseline
evaluation in educational settings.
Objective 2
To evaluate how early logged/self-reported emotions (Bored%,
confused%, delight, Frust%, Engaged%) predict long-term success and
whether emotion volatility (EmotionSD) provides incremental predictive
value.
Descriptive Statistics
# Select emotion predictors + outcome
emo_df <- df %>%
select(`Bored%`, `confused%`, delight, `Frust%`, `Engaged%`, final)
# Summary table
emo_desc <- emo_df %>%
summarise(across(everything(),
list(mean = ~mean(., na.rm=TRUE),
sd = ~sd(., na.rm=TRUE),
min = ~min(., na.rm=TRUE),
max = ~max(., na.rm=TRUE)))) %>%
pivot_longer(cols = everything(),
names_to = c("Variable", "Statistic"),
names_sep = "_") %>%
pivot_wider(names_from = Statistic, values_from = value) %>%
mutate(Variable = recode(Variable,
"Bored%" = "Bored",
"confused%" = "Confused",
"delight" = "Delighted",
"Frust%" = "Frustrated",
"Engaged%" = "Engaged"))
kable(emo_desc, digits = 3, caption = "Descriptive Statistics of Emotions and Final Test ")
Descriptive Statistics of Emotions and Final Test
| Bored |
10.860 |
4.288 |
4.000 |
18.000 |
| Confused |
10.079 |
6.060 |
0.000 |
20.000 |
| Delighted |
2.397 |
1.718 |
0.000 |
5.000 |
| Frustrated |
23.874 |
7.934 |
4.000 |
45.000 |
| Engaged |
52.790 |
10.869 |
22.000 |
84.000 |
| final |
76.074 |
14.530 |
26.635 |
99.622 |
Interpretation
The correlation results show that Frustrated (r = .35) has a
moderate positive association with Final scores, suggesting that
students who reported more frustration tended to perform better. In
contrast, Engaged (r = –.28) is negatively correlated with Final scores,
possibly reflecting that higher engagement time does not necessarily
translate into higher achievement. Bored, Confused, and Delighted show
negligible correlations with Final scores, indicating they are not
strong predictors of long-term success.
Correlation
# Rename for clarity
emo_df <- emo_df %>%
dplyr::rename(
Bored = `Bored%`,
Confused = `confused%`,
Delighted = `delight`,
Frustrated = `Frust%`,
Engaged = `Engaged%`,
Final = `final`
)
# Model A: emotions only
modE1 <- lm(Final ~ Bored + Confused + Delighted + Frustrated + Engaged, data = emo_df)
# Compute for EmotionalSD across emotions per row
emo_df <- emo_df %>%
rowwise() %>%
mutate(EmotionSD = sd(c_across(c(Bored, Confused, Delighted, Frustrated, Engaged)), na.rm = TRUE)) %>%
ungroup()
# Model B: emotions + volatility
modE2 <- lm(Final ~ Bored + Confused + Delighted + Frustrated + Engaged + EmotionSD, data = emo_df)
# Summaries
broom::tidy(modE1) %>% kable(digits = 3, caption = "Model A: Emotions Predicting Final Math Score")
Model A: Emotions Predicting Final Math Score
| (Intercept) |
58.492 |
2.018 |
28.988 |
0.000 |
| Bored |
0.128 |
0.101 |
1.272 |
0.204 |
| Confused |
0.002 |
0.071 |
0.022 |
0.983 |
| Delighted |
0.362 |
0.251 |
1.443 |
0.149 |
| Frustrated |
0.641 |
0.054 |
11.811 |
0.000 |
| Engaged |
NA |
NA |
NA |
NA |
broom::tidy(modE2) %>% kable(digits = 3, caption = "Model B: Emotions + Volatility Predicting Final Math Score")
Model B: Emotions + Volatility Predicting Final Math
Score
| (Intercept) |
79.120 |
22.500 |
3.516 |
0.000 |
| Bored |
-0.124 |
0.292 |
-0.425 |
0.671 |
| Confused |
-0.253 |
0.285 |
-0.886 |
0.376 |
| Delighted |
0.062 |
0.411 |
0.150 |
0.880 |
| Frustrated |
0.470 |
0.194 |
2.418 |
0.016 |
| Engaged |
NA |
NA |
NA |
NA |
| EmotionSD |
-0.505 |
0.549 |
-0.921 |
0.358 |
# Compare models
compare_models <- bind_rows(
broom::glance(modE1) %>% mutate(Model = "A_Emotions"),
broom::glance(modE2) %>% mutate(Model = "B_Emotions+Volatility")
)
kable(compare_models %>% select(Model, r.squared, adj.r.squared, AIC), digits = 3, caption = "Model Comparison")
Model Comparison
| A_Emotions |
0.125 |
0.122 |
8067.169 |
| B_Emotions+Volatility |
0.126 |
0.122 |
8068.317 |
# ANOVA test between models
anova(modE1, modE2)
Interpretation
Model A shows that emotions like Bored, Confused, and Delighted do
not significantly predict the final math score, while Frustrated has a
strong positive effect (p < 0.001). In Model B, adding volatility
(EmotionSD) doesn’t improve prediction significantly, as no variable,
including Bored, Confused, or Delighted, shows a meaningful relationship
with the final score. A comparison of models reveals very similar
R-squared values (0.125 vs. 0.126), suggesting that adding volatility
doesn’t offer a substantial improvement over emotions alone, confirmed
by an ANOVA test (p = 0.358).
Plots
Emotions vs. Final Score
#Scatterplots of Emotions vs Final Score
emo_vars <- c("Bored", "Confused", "Delighted", "Frustrated", "Engaged")
for (var in emo_vars) {
print(
ggplot(emo_df, aes_string(x = var, y = "Final")) +
geom_point(alpha = 0.6, color = "steelblue") +
geom_smooth(method = "lm", se = FALSE, color = "darkred") +
labs(title = paste(var, "vs Final Math Score"),
x = var, y = "Final Math Score") +
theme_minimal()
)
}




The scatterplots show different emotional states in relation to final
math scores. Boredom, confusion, and delight appear to have almost no
effect, as the regression lines are nearly flat. However, frustration
shows a positive relationship with higher scores, while engagement
surprisingly trends negatively, suggesting that students who reported
higher frustration tended to perform better, whereas those more engaged
scored slightly lower.
Discussion
Students’ emotional states during learning had mixed effects.
Frustration positively correlated with final scores, indicating that
experiencing some level of challenge may drive effort and persistence,
leading to better outcomes. On the other hand, boredom, confusion,
delight, and engagement did not strongly predict performance, which
suggests that mere presence of positive or neutral emotions is not
enough to drive achievement. The lack of impact from emotional
volatility further indicates that consistent emotional patterns alone do
not explain success. Educators might use this insight to recognize that
manageable levels of frustration can be constructive, while simply
boosting engagement or positive emotions may not guarantee higher
achievement.
Model Comparison (R² values)
#Bar chart for model comparison
compare_models %>%
select(Model, r.squared, adj.r.squared) %>%
pivot_longer(cols = c(r.squared, adj.r.squared),
names_to = "Metric", values_to = "Value") %>%
ggplot(aes(x = Model, y = Value, fill = Metric)) +
geom_col(position = "dodge") +
labs(title = "Model Comparison: Emotions vs Emotions + Volatility",
x = "Model", y = "Value") +
theme_minimal()

Interpretation
The comparison shows that adding volatility to emotions (Model B)
produces almost no improvement over using emotions alone (Model A), as
both models have nearly identical R² and adjusted R² values. This
suggests that volatility contributes little additional explanatory power
in predicting final math scores beyond emotions.
Objective 3
To assess whether speed-game performance (Games/week, % Correct,
Problems/Min, SpeedGame SD) and the derived FluencyIndex predict final
outcomes beyond pretest/midyear scores.
Data Cleaning
#Rename for Clarity
speed_df <- df %>%
dplyr::rename(
GamesPerWeek = `Speed Games/week Avg N`,
PercentCorrect = `SpeedGames %Corr`,
ProblemsPerMin = `Speedgame Prob/Min`,
SpeedSD = `SpeedGame SD` ) %>%
# Convert to numeric
mutate(
GamesPerWeek = as.numeric(GamesPerWeek),
PercentCorrect = as.numeric(PercentCorrect),
ProblemsPerMin = as.numeric(ProblemsPerMin),
SpeedSD = as.numeric(SpeedSD),
FluencyIndex = (PercentCorrect * ProblemsPerMin) / (SpeedSD + 1)
)
Descriptive Statistics
speed_desc <- speed_df %>%
select(GamesPerWeek, PercentCorrect, ProblemsPerMin, SpeedSD, FluencyIndex, final) %>%
psych::describe() %>%
as.data.frame()
kable(speed_desc[,c("mean","sd","min","max")], digits=3,
caption="Descriptive Statistics: Speed-game Performance and Final Scores")
Descriptive Statistics: Speed-game Performance and Final
Scores
| GamesPerWeek |
3.469 |
2.335 |
0.000 |
7.000 |
| PercentCorrect |
42.050 |
22.490 |
10.000 |
96.000 |
| ProblemsPerMin |
5.535 |
5.438 |
0.000 |
148.000 |
| SpeedSD |
2.121 |
1.038 |
1.000 |
4.000 |
| FluencyIndex |
95.156 |
211.653 |
0.000 |
5624.000 |
| final |
76.074 |
14.530 |
26.635 |
99.622 |
Interpretation
The descriptive statistics show substantial variability in
performance metrics, such as speed, accuracy, and fluency, with
participants exhibiting diverse strategies. On average, players solve
around 5.5 problems per minute and correctly answer 42% of questions,
but there’s a broad range in both speed and accuracy. Final scores tend
to be moderately high, though with significant variation across
participants.
Correlation with Final Score
#Use Pearson's Correlation
speed_cor <- psych::corr.test(speed_df %>%
select(GamesPerWeek, PercentCorrect, ProblemsPerMin, SpeedSD, FluencyIndex, final),
method="pearson", use="pairwise")
kable(round(speed_cor$p,3), caption="P-values: Speed-game Performance and Final Score")
P-values: Speed-game Performance and Final Score
| GamesPerWeek |
0.000 |
1.000 |
1.000 |
1.000 |
1 |
1.000 |
| PercentCorrect |
0.868 |
0.000 |
0.001 |
1.000 |
0 |
0.000 |
| ProblemsPerMin |
0.969 |
0.000 |
0.000 |
0.000 |
0 |
0.009 |
| SpeedSD |
0.276 |
0.738 |
0.000 |
0.000 |
0 |
1.000 |
| FluencyIndex |
0.787 |
0.000 |
0.000 |
0.000 |
0 |
0.000 |
| final |
0.709 |
0.000 |
0.001 |
0.213 |
0 |
0.000 |
Interpretation
The analysis shows that Percent Correct, Problems per Minute, and
the Fluency Index significantly predict final math scores (p < .05),
while Games per Week and Speed SD do not. This suggests that accuracy,
speed, and combined fluency in practice are more important for
performance than the frequency of playing or consistency.
Regression Models
# Baseline: pretest + midyear
modS0 <- lm(final ~ pretest + midyear, data = speed_df)
# Add speed-game predictors
modS1 <- lm(final ~ pretest + midyear + GamesPerWeek + PercentCorrect + ProblemsPerMin + SpeedSD, data = speed_df)
# Add derived fluency index
modS2 <- lm(final ~ pretest + midyear + FluencyIndex, data = speed_df)
# Compare models
broom::tidy(modS0) %>% kable(digits=3, caption="Model S0: Pretest + MidYear only")
Model S0: Pretest + MidYear only
| (Intercept) |
59.895 |
1.090 |
54.942 |
0.000 |
| pretest |
0.479 |
0.120 |
3.981 |
0.000 |
| midyear |
-0.163 |
0.116 |
-1.408 |
0.159 |
broom::tidy(modS1) %>% kable(digits=3, caption="Model S1: Add Speed-game metrics")
Model S1: Add Speed-game metrics
| (Intercept) |
59.139 |
1.761 |
33.586 |
0.000 |
| pretest |
0.295 |
0.129 |
2.287 |
0.022 |
| midyear |
-0.120 |
0.121 |
-0.992 |
0.321 |
| GamesPerWeek |
-0.027 |
0.211 |
-0.128 |
0.898 |
| PercentCorrect |
0.210 |
0.025 |
8.290 |
0.000 |
| ProblemsPerMin |
0.071 |
0.080 |
0.888 |
0.375 |
| SpeedSD |
-0.492 |
0.416 |
-1.183 |
0.237 |
broom::tidy(modS2) %>% kable(digits=3, caption="Model S2: Add FluencyIndex")
Model S2: Add FluencyIndex
| (Intercept) |
59.824 |
1.161 |
51.523 |
0.000 |
| pretest |
0.483 |
0.131 |
3.693 |
0.000 |
| midyear |
-0.173 |
0.125 |
-1.381 |
0.167 |
| FluencyIndex |
0.006 |
0.002 |
2.789 |
0.005 |
#Model Comparison
compare_speed <- bind_rows(
broom::glance(modS0) %>% mutate(Model="S0_Pre+Mid"),
broom::glance(modS1) %>% mutate(Model="S1_Pre+Mid+Speed"),
broom::glance(modS2) %>% mutate(Model="S2_Pre+Mid+Fluency")
)
kable(compare_speed %>% select(Model, r.squared, adj.r.squared, AIC), digits=3,
caption="Model Comparison: Speed-game Performance")
Model Comparison: Speed-game Performance
| S0_Pre+Mid |
0.213 |
0.211 |
7957.743 |
| S1_Pre+Mid+Speed |
0.284 |
0.279 |
6789.182 |
| S2_Pre+Mid+Fluency |
0.230 |
0.227 |
6846.078 |
Interpretation
Model S0 shows that pretest scores significantly predict final
scores (p = 0.000), but midyear scores do not (p = 0.159). In Model S1,
adding speed-game metrics improves the model fit (R² increases from
0.213 to 0.284), with PercentCorrect emerging as a strong predictor (p =
0.000), while other game metrics are not significant. Model S2 adds
FluencyIndex, which is a significant positive predictor (p = 0.005),
slightly improving model fit over S0 but less than S1, suggesting
accuracy in speed games (PercentCorrect) is the strongest predictor
among the game metrics.
Plots
Model Comparison
compare_speed %>%
select(Model, r.squared, adj.r.squared) %>%
pivot_longer(cols = c(r.squared, adj.r.squared),
names_to="Metric", values_to="Value") %>%
ggplot(aes(x=Model, y=Value, fill=Metric)) +
geom_col(position="dodge") +
labs(title="Model Comparison: Speed-game Performance",
x="Model", y="Value") +
theme_minimal()
The bar chart shows that adding Speed (S1 model) to Pre- and Mid-test
scores produces the highest explanatory power (R² and Adjusted R²),
indicating that speed significantly improves prediction of performance.
In contrast, adding Fluency (S2 model) provides only a slight
improvement over the baseline (S0 model with just Pre and Mid). This
suggests that speed-related performance is a stronger predictor of final
math outcomes than fluency in this dataset.
Discussion
Analysis of speed-game performance revealed that accuracy and
fluency were stronger predictors of final math scores than simply the
number of games played or variability in speed. This suggests that
quality of practice—how correctly and efficiently students solve
problems—is more important than quantity. The Fluency Index, which
combines accuracy and speed, showed predictive value but less than
accuracy alone, indicating that students benefit from both skill mastery
and efficient problem-solving. For educators and game-based learning
designers, this highlights the need to focus on structured,
skill-building practice rather than just encouraging high
participation.
Objective 4.
To determine whether self-reported Interest moderates the
relationship between fluency and long-term success (test FluencyIndex x
Interest interaction), and whether Interest independently predicts
outcomes.
Data Cleaning
#Make sure variables are numeric
analysis_df <- speed_df %>%
mutate(
Interest = as.numeric(Interest),
FluencyIndex = as.numeric(FluencyIndex),
Final = as.numeric(final))
Regression Models
# Model A - Interest only
modI1 <- lm(Final ~ Interest, data=analysis_df)
# Model B - Fluency only
modI2 <- lm(Final ~ FluencyIndex, data = analysis_df)
# Model C - Fluency + Interest
modI3 <- lm(Final ~ FluencyIndex + Interest, data = analysis_df)
# Model D - Interaction (Fluency × Interest)
modI4 <- lm(Final ~ FluencyIndex * Interest, data = analysis_df)
# Combine tidy outputs with model labels
kable(broom::tidy(modI1) %>% mutate(across(where(is.numeric), round, 3)),
caption = "Model I1: Interest only")
Model I1: Interest only
| (Intercept) |
62.944 |
1.189 |
52.961 |
0 |
| Interest |
0.648 |
0.055 |
11.852 |
0 |
kable(broom::tidy(modI2) %>% mutate(across(where(is.numeric), round, 3)),
caption = "Model I2: Fluency only")
Model I2: Fluency only
| (Intercept) |
74.758 |
0.535 |
139.655 |
0 |
| FluencyIndex |
0.014 |
0.002 |
6.048 |
0 |
kable(broom::tidy(modI3) %>% mutate(across(where(is.numeric), round, 3)),
caption = "Model I3: Fluency + Interest")
Model I3: Fluency + Interest
| (Intercept) |
63.013 |
1.267 |
49.749 |
0 |
| FluencyIndex |
0.009 |
0.002 |
4.110 |
0 |
| Interest |
0.609 |
0.060 |
10.116 |
0 |
kable(broom::tidy(modI4) %>% mutate(across(where(is.numeric), round, 3)),
caption = "Model I4: Fluency × Interest")
Model I4: Fluency × Interest
| (Intercept) |
55.526 |
1.524 |
36.428 |
0 |
| FluencyIndex |
0.166 |
0.019 |
8.624 |
0 |
| Interest |
0.823 |
0.064 |
12.943 |
0 |
| FluencyIndex:Interest |
-0.005 |
0.001 |
-8.198 |
0 |
# create a model comparison table with glance()
compare_interest <- bind_rows(
broom::glance(modI1) %>% mutate(Model = "I1_Interest"),
broom::glance(modI2) %>% mutate(Model = "I2_Fluency"),
broom::glance(modI3) %>% mutate(Model = "I3_Fluency+Interest"),
broom::glance(modI4) %>% mutate(Model = "I4_Fluency×Interest")
)
kable(compare_interest %>%
select(Model, r.squared, adj.r.squared, AIC) %>%
mutate(across(where(is.numeric), round, 3)),
caption = "Model Comparison: Interest and Fluency Models")
Model Comparison: Interest and Fluency Models
| I1_Interest |
0.123 |
0.123 |
8063.561 |
| I2_Fluency |
0.041 |
0.040 |
7031.349 |
| I3_Fluency+Interest |
0.143 |
0.141 |
6936.340 |
| I4_Fluency×Interest |
0.205 |
0.203 |
6873.331 |
Interpretation
The results show that both Interest and FluencyIndex significantly
predict final scores individually, with Interest having a stronger
effect. When combined in Model I3, both predictors remain significant,
improving model fit. Model I4, which includes their interaction,
explains the most variance (R² = 0.205), revealing that the relationship
between FluencyIndex and performance varies depending on levels of
Interest.
Plots
analysis_df <- analysis_df %>%
mutate(
FluencyIndex = as.numeric(FluencyIndex),
Final = as.numeric(Final),
Interest = as.numeric(Interest)
)
# Create categorical Interest levels for visualization
analysis_df <- analysis_df %>%
mutate(InterestLevel = cut(Interest,
breaks = quantile(Interest, probs = c(0,0.33,0.66,1), na.rm = TRUE),
labels = c("Low","Medium","High"),
include.lowest = TRUE))
# Interaction plot
ggplot(analysis_df, aes(x = FluencyIndex, y = Final)) +
geom_point(alpha=0.5) +
geom_smooth(method="lm", se=FALSE, color="red") +
facet_wrap(~InterestLevel) +
labs(title="Fluency vs Final by Interest Level",
x="Fluency Index",
y="Final Math Score") +
theme_minimal()
The results show that interest moderates the relationship between
fluency and math performance. Students with higher interest levels gain
more from increased fluency, while those with low interest show weaker
improvements. This highlights the importance of both cognitive skills
and motivation in achieving better math outcomes.
Discussion
Interest in math emerged as a key factor, both independently and as
a moderator of fluency. Students with higher interest levels gained more
from increased fluency, demonstrating that motivation amplifies the
effect of skill development on outcomes. This aligns with educational
theories that emphasize the role of intrinsic motivation in learning.
For low-interest students, even high fluency did not translate as
strongly into better scores, underscoring the importance of fostering
student engagement and curiosity alongside skill-building. Interventions
that increase interest could therefore improve the impact of learning
activities.
Conclusion
The four objectives of the study were accomplished. For objective 1,
growth metrics were evaluated, showing that pretest scores strongly
predict final outcomes, while midyear and growth measures add little
explanatory power. Objective 2 was met by examining emotion-related
predictors, with results indicating that frustration positively relates
to performance, whereas volatility does not. Objective 3 was achieved by
analyzing speed-game metrics, where accuracy (percent correct) emerged
as the strongest predictor of success. Objective 4 was fulfilled by
testing fluency as moderated by interest, confirming that students with
higher interest benefit more from fluency in achieving better math
performance.