library(kirkegaard)
library(arrow)
library(lme4)
library(glue)
theme_set(theme_bw())
options(digits = 3)
if (F) {
library(btw)
btw::btw_mcp_session()
}
target_top <- read_csv("data/steamdb_top_rated.csv") %>%
mutate(app_id = as.integer(app_id), source = "top_rated")
## [1mRows: [22m[34m500[39m [1mColumns: [22m[34m5[39m
## [36m──[39m [1mColumn specification[22m [36m────────────────────────────────────────────────────────[39m
## [1mDelimiter:[22m ","
## [31mchr[39m (1): name
## [32mdbl[39m (4): rank, app_id, rating_pct, total_reviews
##
## [36mℹ[39m Use `spec()` to retrieve the full column specification for this data.
## [36mℹ[39m Specify the column types or set `show_col_types = FALSE` to quiet this message.
target_sampled <- read_csv("data/sampled_games.csv") %>%
mutate(app_id = as.integer(app_id), source = "sampled")
## [1mRows: [22m[34m500[39m [1mColumns: [22m[34m5[39m
## [36m──[39m [1mColumn specification[22m [36m────────────────────────────────────────────────────────[39m
## [1mDelimiter:[22m ","
## [31mchr[39m (1): name
## [32mdbl[39m (4): app_id, total_reviews, positive, rating_pct
##
## [36mℹ[39m Use `spec()` to retrieve the full column specification for this data.
## [36mℹ[39m Specify the column types or set `show_col_types = FALSE` to quiet this message.
target_games <- bind_rows(
target_top %>% select(app_id, name, rating_pct, total_reviews, source),
target_sampled %>% select(app_id, name, rating_pct, total_reviews, source)
)
# read scraped reviews from parquet
votes <- read_parquet("data/steam_review_votes.parquet") %>%
as_tibble() %>%
mutate(
app_id = as.integer(app_id),
voted_up = as.logical(voted_up)
)
votes
n_total <- nrow(votes)
n_unique <- n_distinct(votes$recommendationid)
cat("Total rows:", n_total, "\n")
## Total rows: 16023234
cat("Unique recommendation IDs:", n_unique, "\n")
## Unique recommendation IDs: 16023234
cat("Duplicates:", n_total - n_unique, "\n")
## Duplicates: 0
game_summary <- votes %>%
group_by(app_id) %>%
summarize(
n_reviews = n(),
mean_rating = mean(voted_up),
.groups = "drop"
) %>%
left_join(target_games %>% select(app_id, name, rating_pct, source), by = "app_id") %>%
arrange(desc(n_reviews))
game_summary
# distribution of review counts by source
target_games %>%
ggplot(aes(total_reviews, fill = source)) +
geom_histogram(alpha = 0.7, position = "identity", bins = 40) +
scale_x_log10(labels = scales::comma) +
labs(
x = "Total reviews (log scale)",
y = "Count",
title = "Distribution of review counts",
subtitle = glue("{nrow(target_games)} games (500 top-rated + 500 sampled)")
)
# cross-sectional: rating vs review count by source
game_summary %>%
left_join(target_games %>% select(app_id, total_reviews), by = "app_id") %>%
ggplot(aes(total_reviews, mean_rating, color = source)) +
geom_point(alpha = 0.5) +
geom_smooth(method = "lm") +
scale_x_log10(labels = scales::comma) +
scale_y_continuous(labels = scales::percent_format()) +
labs(
x = "Total reviews (log scale)",
y = "Mean positive rating",
title = "Rating vs review count across games",
subtitle = glue("{nrow(game_summary)} games")
)
## [37m`geom_smooth()` using formula = 'y ~ x'[39m
Do games get rated more or less positively over their review history?
# add within-game review order and centile
votes_ordered <- votes %>%
left_join(target_games %>% select(app_id, source), by = "app_id") %>%
group_by(app_id) %>%
arrange(timestamp_created, .by_group = TRUE) %>%
mutate(
review_order = row_number(),
review_centile = review_order / n()
) %>%
ungroup()
# bin every 100 reviews for efficiency
votes_binned <- votes_ordered %>%
group_by(app_id) %>%
mutate(bin = (review_order - 1) %/% 100 + 1) %>%
group_by(app_id, bin) %>%
summarize(
n = n(),
mean_rating = mean(voted_up),
mean_centile = mean(review_centile),
mean_order = mean(review_order),
source = first(source),
.groups = "drop"
)
votes_binned
Does rating change as a function of how many reviews a game has accumulated?
fit_abs <- lmer(mean_rating ~ scale(mean_order) * source + (1 | app_id), data = votes_binned, weights = n)
summary(fit_abs)
## Linear mixed model fit by REML ['lmerMod']
## Formula: mean_rating ~ scale(mean_order) * source + (1 | app_id)
## Data: votes_binned
## Weights: n
##
## REML criterion at convergence: -530062
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -18.037 -0.315 0.068 0.394 10.997
##
## Random effects:
## Groups Name Variance Std.Dev.
## app_id (Intercept) 0.0087 0.0933
## Residual 0.2073 0.4553
## Number of obs: 160657, groups: app_id, 1000
##
## Fixed effects:
## Estimate Std. Error t value
## (Intercept) 0.817040 0.004202 194.43
## scale(mean_order) 0.003051 0.000440 6.94
## sourcetop_rated 0.153551 0.005927 25.91
## scale(mean_order):sourcetop_rated -0.007691 0.000472 -16.29
##
## Correlation of Fixed Effects:
## (Intr) scl(_) srctp_
## scl(mn_rdr) 0.081
## sourctp_rtd -0.709 -0.057
## scl(mn_r):_ -0.075 -0.931 0.057
Does rating change as a function of where in the game’s review history we are?
fit_rel <- lmer(mean_rating ~ mean_centile * source + (1 | app_id), data = votes_binned, weights = n)
summary(fit_rel)
## Linear mixed model fit by REML ['lmerMod']
## Formula: mean_rating ~ mean_centile * source + (1 | app_id)
## Data: votes_binned
## Weights: n
##
## REML criterion at convergence: -530149
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -18.143 -0.312 0.068 0.392 11.092
##
## Random effects:
## Groups Name Variance Std.Dev.
## app_id (Intercept) 0.00872 0.0934
## Residual 0.20719 0.4552
## Number of obs: 160657, groups: app_id, 1000
##
## Fixed effects:
## Estimate Std. Error t value
## (Intercept) 0.818542 0.004211 194.40
## mean_centile -0.007717 0.000766 -10.07
## sourcetop_rated 0.160055 0.005941 26.94
## mean_centile:sourcetop_rated -0.004901 0.000894 -5.49
##
## Correlation of Fixed Effects:
## (Intr) mn_cnt srctp_
## mean_centil -0.091
## sourctp_rtd -0.709 0.065
## mn_cntl:sr_ 0.078 -0.858 -0.075
# average rating trajectory by source
votes_ordered %>%
mutate(centile_bin = round(review_centile, 2)) %>%
group_by(centile_bin, source) %>%
summarize(
mean_rating = mean(voted_up),
n = n(),
.groups = "drop"
) %>%
ggplot(aes(centile_bin, mean_rating, color = source)) +
geom_line() +
scale_y_continuous(labels = scales::percent_format()) +
labs(
x = "Review centile (0 = earliest, 1 = latest)",
y = "Mean positive rating",
title = "Average rating trajectory by source"
)
# limit to games with 5+ years of review history, plot first 5 years
game_age <- votes_ordered %>%
group_by(app_id) %>%
summarize(age_days = (max(timestamp_created) - min(timestamp_created)) / 86400, .groups = "drop")
games_5yr <- game_age %>% filter(age_days >= 5 * 365) %>% pull(app_id)
votes_ordered %>%
filter(app_id %in% games_5yr) %>%
group_by(app_id) %>%
mutate(days_since_first = (timestamp_created - min(timestamp_created)) / 86400) %>%
ungroup() %>%
filter(days_since_first <= 5 * 365) %>%
mutate(age_bin = floor(days_since_first / 30) * 30) %>%
group_by(age_bin, source) %>%
summarize(
mean_rating = mean(voted_up),
n = n(),
.groups = "drop"
) %>%
ggplot(aes(age_bin / 365, mean_rating, color = source)) +
geom_line() +
scale_y_continuous(labels = scales::percent_format()) +
labs(
x = "Years since first review",
y = "Mean positive rating",
title = "Rating trajectory by game age",
subtitle = glue("{length(games_5yr)} games with 5+ years of review history")
)
# limit to games with 10k+ reviews, plot first 10k
games_10k <- game_summary %>% filter(n_reviews >= 10000) %>% pull(app_id)
votes_ordered %>%
filter(app_id %in% games_10k, review_order <= 10000) %>%
mutate(count_bin = (review_order - 1) %/% 200 + 1) %>%
group_by(count_bin, source) %>%
summarize(
mean_rating = mean(voted_up),
n_games = n_distinct(app_id),
mean_order = mean(review_order),
.groups = "drop"
) %>%
ggplot(aes(mean_order, mean_rating, color = source)) +
geom_line() +
scale_x_continuous(labels = scales::comma) +
scale_y_continuous(labels = scales::percent_format()) +
labs(
x = "Review count",
y = "Mean positive rating",
title = "Rating trajectory by review count",
subtitle = glue("{length(games_10k)} games with 10k+ reviews")
)
# individual game trajectories (top reviewed games)
top_games <- game_summary %>% slice_max(n_reviews, n = 20)
votes_ordered %>%
filter(app_id %in% top_games$app_id) %>%
mutate(centile_bin = round(review_centile * 20) / 20) %>%
group_by(app_id, centile_bin) %>%
summarize(mean_rating = mean(voted_up), .groups = "drop") %>%
left_join(target_games %>% select(app_id, name), by = "app_id") %>%
ggplot(aes(centile_bin, mean_rating, color = name)) +
geom_line(alpha = 0.7) +
scale_y_continuous(labels = scales::percent_format()) +
labs(
x = "Review centile",
y = "Mean positive rating",
title = "Rating trajectories for top reviewed games"
) +
theme(legend.position = "right")
Game fixed effects with clustered standard errors — more conservative than lmer random effects.
Source main effect is absorbed by game FE. We split models by source to test for differential effects cleanly.
library(fixest)
# Age predictor for binned data
votes_binned_age <- votes_ordered %>%
group_by(app_id) %>%
mutate(days_since_first = (timestamp_created - min(timestamp_created)) / 86400) %>%
ungroup() %>%
group_by(app_id) %>%
mutate(bin = (review_order - 1) %/% 100 + 1) %>%
group_by(app_id, bin) %>%
summarize(
n = n(),
mean_rating = mean(voted_up),
mean_age_years = mean(days_since_first) / 365,
source = first(source),
.groups = "drop"
)
# Pooled (all games)
fe_rel <- feols(mean_rating ~ mean_centile | app_id, data = votes_binned, weights = ~n, vcov = "cluster")
## NOTE: 3 fixed-effect singletons were removed (3 observations).
fe_abs <- feols(mean_rating ~ scale(mean_order) | app_id, data = votes_binned, weights = ~n, vcov = "cluster")
## NOTE: 3 fixed-effect singletons were removed (3 observations).
fe_age <- feols(mean_rating ~ mean_age_years | app_id, data = votes_binned_age, weights = ~n, vcov = "cluster")
## NOTE: 3 fixed-effect singletons were removed (3 observations).
summary(fe_rel)
## OLS estimation, Dep. Var.: mean_rating
## Observations: 160,654
## Weights: n
## Fixed-effects: app_id: 997
## Standard-errors: Clustered (app_id)
## Estimate Std. Error t value Pr(>|t|)
## mean_centile -0.0113 0.00286 -3.96 7.9223e-05 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## RMSE: 0.045441 Adj. R2: 0.810252
## Within R2: 0.005147
summary(fe_abs)
## OLS estimation, Dep. Var.: mean_rating
## Observations: 160,654
## Weights: n
## Fixed-effects: app_id: 997
## Standard-errors: Clustered (app_id)
## Estimate Std. Error t value Pr(>|t|)
## scale(mean_order) -0.00361 0.00104 -3.47 0.00054022 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## RMSE: 0.045486 Adj. R2: 0.809873
## Within R2: 0.003157
summary(fe_age)
## OLS estimation, Dep. Var.: mean_rating
## Observations: 160,654
## Weights: n
## Fixed-effects: app_id: 997
## Standard-errors: Clustered (app_id)
## Estimate Std. Error t value Pr(>|t|)
## mean_age_years -0.00165 0.000533 -3.09 0.0020894 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## RMSE: 0.045477 Adj. R2: 0.809952
## Within R2: 0.003576
# Split by source: top rated
bv_top <- votes_binned %>% filter(source == "top_rated")
ba_top <- votes_binned_age %>% filter(source == "top_rated")
fe_rel_top <- feols(mean_rating ~ mean_centile | app_id, data = bv_top, weights = ~n, vcov = "cluster")
## NOTE: 3 fixed-effect singletons were removed (3 observations).
fe_abs_top <- feols(mean_rating ~ scale(mean_order) | app_id, data = bv_top, weights = ~n, vcov = "cluster")
## NOTE: 3 fixed-effect singletons were removed (3 observations).
fe_age_top <- feols(mean_rating ~ mean_age_years | app_id, data = ba_top, weights = ~n, vcov = "cluster")
## NOTE: 3 fixed-effect singletons were removed (3 observations).
summary(fe_rel_top)
## OLS estimation, Dep. Var.: mean_rating
## Observations: 118,070
## Weights: n
## Fixed-effects: app_id: 497
## Standard-errors: Clustered (app_id)
## Estimate Std. Error t value Pr(>|t|)
## mean_centile -0.0126 0.00135 -9.32 < 2.2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## RMSE: 0.024652 Adj. R2: 0.231137
## Within R2: 0.021364
summary(fe_abs_top)
## OLS estimation, Dep. Var.: mean_rating
## Observations: 118,070
## Weights: n
## Fixed-effects: app_id: 497
## Standard-errors: Clustered (app_id)
## Estimate Std. Error t value Pr(>|t|)
## scale(mean_order) -0.00494 0.000444 -11.1 < 2.2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## RMSE: 0.024663 Adj. R2: 0.230426
## Within R2: 0.020459
summary(fe_age_top)
## OLS estimation, Dep. Var.: mean_rating
## Observations: 118,070
## Weights: n
## Fixed-effects: app_id: 497
## Standard-errors: Clustered (app_id)
## Estimate Std. Error t value Pr(>|t|)
## mean_age_years -0.00223 0.000348 -6.4 3.5217e-10 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## RMSE: 0.024692 Adj. R2: 0.228609
## Within R2: 0.018146
# Split by source: sampled
bv_sam <- votes_binned %>% filter(source == "sampled")
ba_sam <- votes_binned_age %>% filter(source == "sampled")
fe_rel_sam <- feols(mean_rating ~ mean_centile | app_id, data = bv_sam, weights = ~n, vcov = "cluster")
fe_abs_sam <- feols(mean_rating ~ scale(mean_order) | app_id, data = bv_sam, weights = ~n, vcov = "cluster")
fe_age_sam <- feols(mean_rating ~ mean_age_years | app_id, data = ba_sam, weights = ~n, vcov = "cluster")
summary(fe_rel_sam)
## OLS estimation, Dep. Var.: mean_rating
## Observations: 42,584
## Weights: n
## Fixed-effects: app_id: 500
## Standard-errors: Clustered (app_id)
## Estimate Std. Error t value Pr(>|t|)
## mean_centile -0.00772 0.0101 -0.764 0.4451
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## RMSE: 0.078235 Adj. R2: 0.741815
## Within R2: 8.097e-4
summary(fe_abs_sam)
## OLS estimation, Dep. Var.: mean_rating
## Observations: 42,584
## Weights: n
## Fixed-effects: app_id: 500
## Standard-errors: Clustered (app_id)
## Estimate Std. Error t value Pr(>|t|)
## scale(mean_order) 0.00216 0.00501 0.431 0.66677
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## RMSE: 0.078252 Adj. R2: 0.741705
## Within R2: 3.805e-4
summary(fe_age_sam)
## OLS estimation, Dep. Var.: mean_rating
## Observations: 42,584
## Weights: n
## Fixed-effects: app_id: 500
## Standard-errors: Clustered (app_id)
## Estimate Std. Error t value Pr(>|t|)
## mean_age_years -0.000737 0.00121 -0.61 0.54247
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## RMSE: 0.078253 Adj. R2: 0.741699
## Within R2: 3.579e-4
library(modelsummary)
models <- list(
"Top: Centile" = fe_rel_top,
"Top: Count" = fe_abs_top,
"Top: Age" = fe_age_top,
"Sampled: Centile" = fe_rel_sam,
"Sampled: Count" = fe_abs_sam,
"Sampled: Age" = fe_age_sam
)
modelsummary(
models,
stars = c('*' = .05, '**' = .01, '***' = .001),
gof_map = c("nobs", "r2.within", "FE: app_id"),
title = "Fixed effects models of within-game rating trajectories"
)
| Top: Centile | Top: Count | Top: Age | Sampled: Centile | Sampled: Count | Sampled: Age | |
|---|---|---|---|---|---|---|
| * p < 0.05, ** p < 0.01, *** p < 0.001 | ||||||
| mean_centile | -0.013*** | -0.008 | ||||
| (0.001) | (0.010) | |||||
| scale(mean_order) | -0.005*** | 0.002 | ||||
| (0.000) | (0.005) | |||||
| mean_age_years | -0.002*** | -0.001 | ||||
| (0.000) | (0.001) | |||||
| Num.Obs. | 118070 | 118070 | 118070 | 42584 | 42584 | 42584 |
| R2 Within | 0.021 | 0.020 | 0.018 | 0.001 | 0.000 | 0.000 |
| FE: app_id | X | X | X | X | X | X |
modelsummary(
models,
stars = c('*' = .05, '**' = .01, '***' = .001),
gof_map = c("nobs", "r2.within", "FE: app_id"),
output = "figs/fe_models_table.png",
title = "Fixed effects models of within-game rating trajectories"
)
Why is the top-rated slope significant but the sampled one isn’t, despite the sampled plot showing a larger visual trend? Within-game noise is much higher for sampled games.
game_slopes <- votes_binned %>%
group_by(app_id, source) %>%
filter(n() >= 5) %>%
summarize(
slope = coef(lm(mean_rating ~ mean_order, weights = n))[2],
n_bins = n(),
sd_rating = sd(mean_rating),
.groups = "drop"
)
game_slopes %>%
group_by(source) %>%
summarize(
n_games = n(),
mean_slope = mean(slope),
sd_slope = sd(slope),
median_slope = median(slope),
mean_within_sd = mean(sd_rating),
mean_bins = mean(n_bins),
.groups = "drop"
)
library(splines)
fe_rel_ns <- feols(mean_rating ~ ns(mean_centile, 4) | app_id, data = votes_binned, weights = ~n, vcov = "cluster")
## NOTE: 3 fixed-effect singletons were removed (3 observations).
fe_abs_ns <- feols(mean_rating ~ ns(mean_order, 4) | app_id, data = votes_binned, weights = ~n, vcov = "cluster")
## NOTE: 3 fixed-effect singletons were removed (3 observations).
fe_age_ns <- feols(mean_rating ~ ns(mean_age_years, 4) | app_id, data = votes_binned_age, weights = ~n, vcov = "cluster")
## NOTE: 3 fixed-effect singletons were removed (3 observations).
summary(fe_rel_ns)
## OLS estimation, Dep. Var.: mean_rating
## Observations: 160,654
## Weights: n
## Fixed-effects: app_id: 997
## Standard-errors: Clustered (app_id)
## Estimate Std. Error t value Pr(>|t|)
## ns(mean_centile, 4)1 0.00470 0.00317 1.482 1.3864e-01
## ns(mean_centile, 4)2 -0.00184 0.00337 -0.546 5.8530e-01
## ns(mean_centile, 4)3 -0.00738 0.00536 -1.377 1.6887e-01
## ns(mean_centile, 4)4 -0.01465 0.00327 -4.481 8.2784e-06 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## RMSE: 0.045333 Adj. R2: 0.811147
## Within R2: 0.009858
summary(fe_abs_ns)
## OLS estimation, Dep. Var.: mean_rating
## Observations: 160,654
## Weights: n
## Fixed-effects: app_id: 997
## Standard-errors: Clustered (app_id)
## Estimate Std. Error t value Pr(>|t|)
## ns(mean_order, 4)1 0.000725 0.00333 0.217 8.2795e-01
## ns(mean_order, 4)2 -0.023596 0.00658 -3.586 3.5167e-04 ***
## ns(mean_order, 4)3 -0.044393 0.00600 -7.400 2.8812e-13 ***
## ns(mean_order, 4)4 -0.048554 0.00230 -21.116 < 2.2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## RMSE: 0.045457 Adj. R2: 0.810109
## Within R2: 0.004415
summary(fe_age_ns)
## OLS estimation, Dep. Var.: mean_rating
## Observations: 160,654
## Weights: n
## Fixed-effects: app_id: 997
## Standard-errors: Clustered (app_id)
## Estimate Std. Error t value Pr(>|t|)
## ns(mean_age_years, 4)1 -0.00957 0.00324 -2.95 0.0032567 **
## ns(mean_age_years, 4)2 -0.00928 0.00666 -1.39 0.1641186
## ns(mean_age_years, 4)3 -0.01375 0.00860 -1.60 0.1100888
## ns(mean_age_years, 4)4 -0.02328 0.01083 -2.15 0.0318862 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## RMSE: 0.04543 Adj. R2: 0.810338
## Within R2: 0.005618
# Pearson (log-transformed count) and Spearman correlations by source
game_summary %>%
left_join(target_games %>% select(app_id, total_reviews), by = "app_id") %>%
group_by(source) %>%
summarize(
pearson = cor(log10(total_reviews), mean_rating, method = "pearson"),
spearman = cor(total_reviews, mean_rating, method = "spearman"),
n = n(),
.groups = "drop"
)
Do early reviews predict the long-term rating? Or is there a recency bias that inflates initial scores?
# Compute 1-month and 5-year ratings for games with 5+ years of data
predict_data <- votes_ordered %>%
filter(app_id %in% games_5yr) %>%
group_by(app_id) %>%
mutate(days_since_first = (timestamp_created - min(timestamp_created)) / 86400) %>%
ungroup()
rating_1mo <- predict_data %>%
filter(days_since_first <= 30) %>%
group_by(app_id, source) %>%
summarize(rating_1mo = mean(voted_up), n_1mo = n(), .groups = "drop")
rating_5yr <- predict_data %>%
filter(days_since_first >= 4 * 365, days_since_first <= 5 * 365) %>%
group_by(app_id, source) %>%
summarize(rating_5yr = mean(voted_up), n_5yr = n(), .groups = "drop")
predict_df <- inner_join(rating_1mo, rating_5yr, by = c("app_id", "source")) %>%
filter(n_1mo >= 10, n_5yr >= 10)
fit_predict <- lm(rating_5yr ~ rating_1mo * source, data = predict_df)
summary(fit_predict)
##
## Call:
## lm(formula = rating_5yr ~ rating_1mo * source, data = predict_df)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.5226 -0.0168 0.0077 0.0590 0.2681
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 0.2945 0.0388 7.59 1.8e-13 ***
## rating_1mo 0.5872 0.0453 12.96 < 2e-16 ***
## sourcetop_rated 0.5888 0.4946 1.19 0.23
## rating_1mo:sourcetop_rated -0.5016 0.5067 -0.99 0.32
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.106 on 457 degrees of freedom
## Multiple R-squared: 0.496, Adjusted R-squared: 0.493
## F-statistic: 150 on 3 and 457 DF, p-value: <2e-16
predict_df %>%
ggplot(aes(rating_1mo, rating_5yr, color = source)) +
geom_point(alpha = 0.5) +
geom_smooth(method = "lm", aes(group = 1), color = "blue") +
geom_abline(slope = 1, intercept = 0, linetype = "dashed", alpha = 0.5) +
scale_x_continuous(labels = scales::percent_format()) +
scale_y_continuous(labels = scales::percent_format()) +
labs(
x = "Rating after 1 month",
y = "Rating after 5 years",
title = "Predicting long-term rating from early reviews",
subtitle = glue("R² = {round(summary(fit_predict)$r.squared, 2)}, n = {nrow(predict_df)} games with 5+ years of data. Dashed = identity line.")
)
## [37m`geom_smooth()` using formula = 'y ~ x'[39m
The slope of ~0.59 (not 1.0) indicates regression to the mean. Since the FE models show no meaningful within-game rating change over time, this is pure statistical regression: early ratings are noisy estimates that converge to the true rating as more reviews accumulate. Unlike IMDB, there is no recency bias — early Steam reviews are noisy but unbiased.
write_sessioninfo()
## R version 4.5.2 (2025-10-31)
## Platform: x86_64-pc-linux-gnu
## Running under: Linux Mint 21.1
##
## Matrix products: default
## BLAS: /usr/lib/x86_64-linux-gnu/blas/libblas.so.3.10.0
## LAPACK: /usr/lib/x86_64-linux-gnu/lapack/liblapack.so.3.10.0 LAPACK version 3.10.0
##
## locale:
## [1] LC_CTYPE=en_US.UTF-8 LC_NUMERIC=C
## [3] LC_TIME=en_US.UTF-8 LC_COLLATE=en_US.UTF-8
## [5] LC_MONETARY=en_DK.UTF-8 LC_MESSAGES=en_US.UTF-8
## [7] LC_PAPER=en_DK.UTF-8 LC_NAME=C
## [9] LC_ADDRESS=C LC_TELEPHONE=C
## [11] LC_MEASUREMENT=en_DK.UTF-8 LC_IDENTIFICATION=C
##
## time zone: Europe/Brussels
## tzcode source: system (glibc)
##
## attached base packages:
## [1] splines stats graphics grDevices utils datasets methods
## [8] base
##
## other attached packages:
## [1] arrow_23.0.1.1 modelsummary_2.5.0 fixest_0.13.2
## [4] httr2_1.2.1 btw_1.1.0 glue_1.8.0
## [7] lme4_1.1-37 Matrix_1.7-4 RSQLite_2.4.3
## [10] kirkegaard_2025-11-19 robustbase_0.99-6 psych_2.5.6
## [13] assertthat_0.2.1 weights_1.1.2 magrittr_2.0.4
## [16] lubridate_1.9.4 forcats_1.0.1 stringr_1.6.0
## [19] dplyr_1.1.4 purrr_1.2.0 readr_2.1.5
## [22] tidyr_1.3.1 tibble_3.3.0 ggplot2_4.0.1.9000
## [25] tidyverse_2.0.0
##
## loaded via a namespace (and not attached):
## [1] RColorBrewer_1.1-3 rstudioapi_0.17.1 jsonlite_2.0.0
## [4] shape_1.4.6.1 datawizard_1.3.0 TH.data_1.1-4
## [7] estimability_1.5.1 jomo_2.7-6 farver_2.1.2
## [10] nloptr_2.2.1 rmarkdown_2.30 fs_1.6.6
## [13] ragg_1.5.0 vctrs_0.6.5 memoise_2.0.1
## [16] minqa_1.2.8 ellmer_0.4.0 effectsize_1.0.1
## [19] askpass_1.2.1 base64enc_0.1-3 htmltools_0.5.8.1
## [22] curl_7.0.0 broom_1.0.11 Formula_1.2-5
## [25] mitml_0.4-5 sass_0.4.10 parallelly_1.45.1
## [28] bslib_0.9.0 htmlwidgets_1.6.4 sandwich_3.1-1
## [31] emmeans_1.11.2-8 zoo_1.8-14 cachem_1.1.0
## [34] lifecycle_1.0.4 iterators_1.0.14 pkgconfig_2.0.3
## [37] webshot2_0.1.2 R6_2.6.1 fastmap_1.2.0
## [40] future_1.67.0 rbibutils_2.3 digest_0.6.39
## [43] numDeriv_2016.8-1.1 colorspace_2.1-2 ps_1.9.1
## [46] textshaping_1.0.4 Hmisc_5.2-4 labeling_0.4.3
## [49] tinytable_0.14.0 fansi_1.0.6 timechange_0.3.0
## [52] gdata_3.0.1 mgcv_1.9-3 compiler_4.5.2
## [55] bit64_4.6.0-1 withr_3.0.2 htmlTable_2.4.3
## [58] S7_0.2.1 backports_1.5.0 DBI_1.2.3
## [61] performance_0.15.2 pan_1.9 MASS_7.3-65
## [64] mcptools_0.2.0 openssl_2.3.4 rappdirs_0.3.3
## [67] gtools_3.9.5 chromote_0.5.1 tools_4.5.2
## [70] foreign_0.8-91 future.apply_1.20.0 nnet_7.3-20
## [73] nlme_3.1-168 stringmagic_1.2.0 promises_1.3.3
## [76] grid_4.5.2 rsconnect_1.5.1 checkmate_2.3.3
## [79] cluster_2.1.8.2 generics_0.1.4 gtable_0.3.6
## [82] tzdb_0.5.0 websocket_1.4.4 data.table_1.17.8
## [85] hms_1.1.3 utf8_1.2.6 tables_0.9.31
## [88] foreach_1.5.2 pillar_1.11.1 vroom_1.6.6
## [91] later_1.4.4 lattice_0.22-7 survival_3.8-6
## [94] bit_4.6.0 dreamerr_1.5.0 tidyselect_1.2.1
## [97] coro_1.1.0 knitr_1.50 reformulas_0.4.1
## [100] gridExtra_2.3
## [ reached 'max' / getOption("max.print") -- omitted 34 entries ]