Init

library(kirkegaard)
library(arrow)
library(lme4)
library(glue)

theme_set(theme_bw())
options(digits = 3)

if (F) {
  library(btw)
  btw::btw_mcp_session()
}

Data

target_top <- read_csv("data/steamdb_top_rated.csv") %>%
  mutate(app_id = as.integer(app_id), source = "top_rated")
## Rows: 500 Columns: 5
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (1): name
## dbl (4): rank, app_id, rating_pct, total_reviews
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
target_sampled <- read_csv("data/sampled_games.csv") %>%
  mutate(app_id = as.integer(app_id), source = "sampled")
## Rows: 500 Columns: 5
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (1): name
## dbl (4): app_id, total_reviews, positive, rating_pct
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
target_games <- bind_rows(
  target_top %>% select(app_id, name, rating_pct, total_reviews, source),
  target_sampled %>% select(app_id, name, rating_pct, total_reviews, source)
)

# read scraped reviews from parquet
votes <- read_parquet("data/steam_review_votes.parquet") %>%
  as_tibble() %>%
  mutate(
    app_id = as.integer(app_id),
    voted_up = as.logical(voted_up)
  )

votes

Data integrity

n_total <- nrow(votes)
n_unique <- n_distinct(votes$recommendationid)
cat("Total rows:", n_total, "\n")
## Total rows: 16023234
cat("Unique recommendation IDs:", n_unique, "\n")
## Unique recommendation IDs: 16023234
cat("Duplicates:", n_total - n_unique, "\n")
## Duplicates: 0

Per-game summary

game_summary <- votes %>%
  group_by(app_id) %>%
  summarize(
    n_reviews = n(),
    mean_rating = mean(voted_up),
    .groups = "drop"
  ) %>%
  left_join(target_games %>% select(app_id, name, rating_pct, source), by = "app_id") %>%
  arrange(desc(n_reviews))

game_summary
# distribution of review counts by source
target_games %>%
  ggplot(aes(total_reviews, fill = source)) +
  geom_histogram(alpha = 0.7, position = "identity", bins = 40) +
  scale_x_log10(labels = scales::comma) +
  labs(
    x = "Total reviews (log scale)",
    y = "Count",
    title = "Distribution of review counts",
    subtitle = glue("{nrow(target_games)} games (500 top-rated + 500 sampled)")
  )

# cross-sectional: rating vs review count by source
game_summary %>%
  left_join(target_games %>% select(app_id, total_reviews), by = "app_id") %>%
  ggplot(aes(total_reviews, mean_rating, color = source)) +
  geom_point(alpha = 0.5) +
  geom_smooth(method = "lm") +
  scale_x_log10(labels = scales::comma) +
  scale_y_continuous(labels = scales::percent_format()) +
  labs(
    x = "Total reviews (log scale)",
    y = "Mean positive rating",
    title = "Rating vs review count across games",
    subtitle = glue("{nrow(game_summary)} games")
  )
## `geom_smooth()` using formula = 'y ~ x'

Within-game rating trajectory

Do games get rated more or less positively over their review history?

# add within-game review order and centile
votes_ordered <- votes %>%
  left_join(target_games %>% select(app_id, source), by = "app_id") %>%
  group_by(app_id) %>%
  arrange(timestamp_created, .by_group = TRUE) %>%
  mutate(
    review_order = row_number(),
    review_centile = review_order / n()
  ) %>%
  ungroup()

Bin into windows for modeling

# bin every 100 reviews for efficiency
votes_binned <- votes_ordered %>%
  group_by(app_id) %>%
  mutate(bin = (review_order - 1) %/% 100 + 1) %>%
  group_by(app_id, bin) %>%
  summarize(
    n = n(),
    mean_rating = mean(voted_up),
    mean_centile = mean(review_centile),
    mean_order = mean(review_order),
    source = first(source),
    .groups = "drop"
  )

votes_binned

Model: absolute review count

Does rating change as a function of how many reviews a game has accumulated?

fit_abs <- lmer(mean_rating ~ scale(mean_order) * source + (1 | app_id), data = votes_binned, weights = n)
summary(fit_abs)
## Linear mixed model fit by REML ['lmerMod']
## Formula: mean_rating ~ scale(mean_order) * source + (1 | app_id)
##    Data: votes_binned
## Weights: n
## 
## REML criterion at convergence: -530062
## 
## Scaled residuals: 
##     Min      1Q  Median      3Q     Max 
## -18.037  -0.315   0.068   0.394  10.997 
## 
## Random effects:
##  Groups   Name        Variance Std.Dev.
##  app_id   (Intercept) 0.0087   0.0933  
##  Residual             0.2073   0.4553  
## Number of obs: 160657, groups:  app_id, 1000
## 
## Fixed effects:
##                                    Estimate Std. Error t value
## (Intercept)                        0.817040   0.004202  194.43
## scale(mean_order)                  0.003051   0.000440    6.94
## sourcetop_rated                    0.153551   0.005927   25.91
## scale(mean_order):sourcetop_rated -0.007691   0.000472  -16.29
## 
## Correlation of Fixed Effects:
##             (Intr) scl(_) srctp_
## scl(mn_rdr)  0.081              
## sourctp_rtd -0.709 -0.057       
## scl(mn_r):_ -0.075 -0.931  0.057

Model: relative position (centile)

Does rating change as a function of where in the game’s review history we are?

fit_rel <- lmer(mean_rating ~ mean_centile * source + (1 | app_id), data = votes_binned, weights = n)
summary(fit_rel)
## Linear mixed model fit by REML ['lmerMod']
## Formula: mean_rating ~ mean_centile * source + (1 | app_id)
##    Data: votes_binned
## Weights: n
## 
## REML criterion at convergence: -530149
## 
## Scaled residuals: 
##     Min      1Q  Median      3Q     Max 
## -18.143  -0.312   0.068   0.392  11.092 
## 
## Random effects:
##  Groups   Name        Variance Std.Dev.
##  app_id   (Intercept) 0.00872  0.0934  
##  Residual             0.20719  0.4552  
## Number of obs: 160657, groups:  app_id, 1000
## 
## Fixed effects:
##                               Estimate Std. Error t value
## (Intercept)                   0.818542   0.004211  194.40
## mean_centile                 -0.007717   0.000766  -10.07
## sourcetop_rated               0.160055   0.005941   26.94
## mean_centile:sourcetop_rated -0.004901   0.000894   -5.49
## 
## Correlation of Fixed Effects:
##             (Intr) mn_cnt srctp_
## mean_centil -0.091              
## sourctp_rtd -0.709  0.065       
## mn_cntl:sr_  0.078 -0.858 -0.075

Visualize trajectories

# average rating trajectory by source
votes_ordered %>%
  mutate(centile_bin = round(review_centile, 2)) %>%
  group_by(centile_bin, source) %>%
  summarize(
    mean_rating = mean(voted_up),
    n = n(),
    .groups = "drop"
  ) %>%
  ggplot(aes(centile_bin, mean_rating, color = source)) +
  geom_line() +
  scale_y_continuous(labels = scales::percent_format()) +
  labs(
    x = "Review centile (0 = earliest, 1 = latest)",
    y = "Mean positive rating",
    title = "Average rating trajectory by source"
  )

# limit to games with 5+ years of review history, plot first 5 years
game_age <- votes_ordered %>%
  group_by(app_id) %>%
  summarize(age_days = (max(timestamp_created) - min(timestamp_created)) / 86400, .groups = "drop")
games_5yr <- game_age %>% filter(age_days >= 5 * 365) %>% pull(app_id)

votes_ordered %>%
  filter(app_id %in% games_5yr) %>%
  group_by(app_id) %>%
  mutate(days_since_first = (timestamp_created - min(timestamp_created)) / 86400) %>%
  ungroup() %>%
  filter(days_since_first <= 5 * 365) %>%
  mutate(age_bin = floor(days_since_first / 30) * 30) %>%
  group_by(age_bin, source) %>%
  summarize(
    mean_rating = mean(voted_up),
    n = n(),
    .groups = "drop"
  ) %>%
  ggplot(aes(age_bin / 365, mean_rating, color = source)) +
  geom_line() +
  scale_y_continuous(labels = scales::percent_format()) +
  labs(
    x = "Years since first review",
    y = "Mean positive rating",
    title = "Rating trajectory by game age",
    subtitle = glue("{length(games_5yr)} games with 5+ years of review history")
  )

# limit to games with 10k+ reviews, plot first 10k
games_10k <- game_summary %>% filter(n_reviews >= 10000) %>% pull(app_id)

votes_ordered %>%
  filter(app_id %in% games_10k, review_order <= 10000) %>%
  mutate(count_bin = (review_order - 1) %/% 200 + 1) %>%
  group_by(count_bin, source) %>%
  summarize(
    mean_rating = mean(voted_up),
    n_games = n_distinct(app_id),
    mean_order = mean(review_order),
    .groups = "drop"
  ) %>%
  ggplot(aes(mean_order, mean_rating, color = source)) +
  geom_line() +
  scale_x_continuous(labels = scales::comma) +
  scale_y_continuous(labels = scales::percent_format()) +
  labs(
    x = "Review count",
    y = "Mean positive rating",
    title = "Rating trajectory by review count",
    subtitle = glue("{length(games_10k)} games with 10k+ reviews")
  )

# individual game trajectories (top reviewed games)
top_games <- game_summary %>% slice_max(n_reviews, n = 20)

votes_ordered %>%
  filter(app_id %in% top_games$app_id) %>%
  mutate(centile_bin = round(review_centile * 20) / 20) %>%
  group_by(app_id, centile_bin) %>%
  summarize(mean_rating = mean(voted_up), .groups = "drop") %>%
  left_join(target_games %>% select(app_id, name), by = "app_id") %>%
  ggplot(aes(centile_bin, mean_rating, color = name)) +
  geom_line(alpha = 0.7) +
  scale_y_continuous(labels = scales::percent_format()) +
  labs(
    x = "Review centile",
    y = "Mean positive rating",
    title = "Rating trajectories for top reviewed games"
  ) +
  theme(legend.position = "right")

Fixed effects models (fixest)

Game fixed effects with clustered standard errors — more conservative than lmer random effects.

Source main effect is absorbed by game FE. We split models by source to test for differential effects cleanly.

library(fixest)

# Age predictor for binned data
votes_binned_age <- votes_ordered %>%
  group_by(app_id) %>%
  mutate(days_since_first = (timestamp_created - min(timestamp_created)) / 86400) %>%
  ungroup() %>%
  group_by(app_id) %>%
  mutate(bin = (review_order - 1) %/% 100 + 1) %>%
  group_by(app_id, bin) %>%
  summarize(
    n = n(),
    mean_rating = mean(voted_up),
    mean_age_years = mean(days_since_first) / 365,
    source = first(source),
    .groups = "drop"
  )
# Pooled (all games)
fe_rel <- feols(mean_rating ~ mean_centile | app_id, data = votes_binned, weights = ~n, vcov = "cluster")
## NOTE: 3 fixed-effect singletons were removed (3 observations).
fe_abs <- feols(mean_rating ~ scale(mean_order) | app_id, data = votes_binned, weights = ~n, vcov = "cluster")
## NOTE: 3 fixed-effect singletons were removed (3 observations).
fe_age <- feols(mean_rating ~ mean_age_years | app_id, data = votes_binned_age, weights = ~n, vcov = "cluster")
## NOTE: 3 fixed-effect singletons were removed (3 observations).
summary(fe_rel)
## OLS estimation, Dep. Var.: mean_rating
## Observations: 160,654
## Weights: n
## Fixed-effects: app_id: 997
## Standard-errors: Clustered (app_id) 
##              Estimate Std. Error t value   Pr(>|t|)    
## mean_centile  -0.0113    0.00286   -3.96 7.9223e-05 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## RMSE: 0.045441     Adj. R2: 0.810252
##                  Within R2: 0.005147
summary(fe_abs)
## OLS estimation, Dep. Var.: mean_rating
## Observations: 160,654
## Weights: n
## Fixed-effects: app_id: 997
## Standard-errors: Clustered (app_id) 
##                   Estimate Std. Error t value   Pr(>|t|)    
## scale(mean_order) -0.00361    0.00104   -3.47 0.00054022 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## RMSE: 0.045486     Adj. R2: 0.809873
##                  Within R2: 0.003157
summary(fe_age)
## OLS estimation, Dep. Var.: mean_rating
## Observations: 160,654
## Weights: n
## Fixed-effects: app_id: 997
## Standard-errors: Clustered (app_id) 
##                Estimate Std. Error t value  Pr(>|t|)    
## mean_age_years -0.00165   0.000533   -3.09 0.0020894 ** 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## RMSE: 0.045477     Adj. R2: 0.809952
##                  Within R2: 0.003576
# Split by source: top rated
bv_top <- votes_binned %>% filter(source == "top_rated")
ba_top <- votes_binned_age %>% filter(source == "top_rated")

fe_rel_top <- feols(mean_rating ~ mean_centile | app_id, data = bv_top, weights = ~n, vcov = "cluster")
## NOTE: 3 fixed-effect singletons were removed (3 observations).
fe_abs_top <- feols(mean_rating ~ scale(mean_order) | app_id, data = bv_top, weights = ~n, vcov = "cluster")
## NOTE: 3 fixed-effect singletons were removed (3 observations).
fe_age_top <- feols(mean_rating ~ mean_age_years | app_id, data = ba_top, weights = ~n, vcov = "cluster")
## NOTE: 3 fixed-effect singletons were removed (3 observations).
summary(fe_rel_top)
## OLS estimation, Dep. Var.: mean_rating
## Observations: 118,070
## Weights: n
## Fixed-effects: app_id: 497
## Standard-errors: Clustered (app_id) 
##              Estimate Std. Error t value  Pr(>|t|)    
## mean_centile  -0.0126    0.00135   -9.32 < 2.2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## RMSE: 0.024652     Adj. R2: 0.231137
##                  Within R2: 0.021364
summary(fe_abs_top)
## OLS estimation, Dep. Var.: mean_rating
## Observations: 118,070
## Weights: n
## Fixed-effects: app_id: 497
## Standard-errors: Clustered (app_id) 
##                   Estimate Std. Error t value  Pr(>|t|)    
## scale(mean_order) -0.00494   0.000444   -11.1 < 2.2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## RMSE: 0.024663     Adj. R2: 0.230426
##                  Within R2: 0.020459
summary(fe_age_top)
## OLS estimation, Dep. Var.: mean_rating
## Observations: 118,070
## Weights: n
## Fixed-effects: app_id: 497
## Standard-errors: Clustered (app_id) 
##                Estimate Std. Error t value   Pr(>|t|)    
## mean_age_years -0.00223   0.000348    -6.4 3.5217e-10 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## RMSE: 0.024692     Adj. R2: 0.228609
##                  Within R2: 0.018146
# Split by source: sampled
bv_sam <- votes_binned %>% filter(source == "sampled")
ba_sam <- votes_binned_age %>% filter(source == "sampled")

fe_rel_sam <- feols(mean_rating ~ mean_centile | app_id, data = bv_sam, weights = ~n, vcov = "cluster")
fe_abs_sam <- feols(mean_rating ~ scale(mean_order) | app_id, data = bv_sam, weights = ~n, vcov = "cluster")
fe_age_sam <- feols(mean_rating ~ mean_age_years | app_id, data = ba_sam, weights = ~n, vcov = "cluster")

summary(fe_rel_sam)
## OLS estimation, Dep. Var.: mean_rating
## Observations: 42,584
## Weights: n
## Fixed-effects: app_id: 500
## Standard-errors: Clustered (app_id) 
##              Estimate Std. Error t value Pr(>|t|) 
## mean_centile -0.00772     0.0101  -0.764   0.4451 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## RMSE: 0.078235     Adj. R2: 0.741815
##                  Within R2: 8.097e-4
summary(fe_abs_sam)
## OLS estimation, Dep. Var.: mean_rating
## Observations: 42,584
## Weights: n
## Fixed-effects: app_id: 500
## Standard-errors: Clustered (app_id) 
##                   Estimate Std. Error t value Pr(>|t|) 
## scale(mean_order)  0.00216    0.00501   0.431  0.66677 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## RMSE: 0.078252     Adj. R2: 0.741705
##                  Within R2: 3.805e-4
summary(fe_age_sam)
## OLS estimation, Dep. Var.: mean_rating
## Observations: 42,584
## Weights: n
## Fixed-effects: app_id: 500
## Standard-errors: Clustered (app_id) 
##                 Estimate Std. Error t value Pr(>|t|) 
## mean_age_years -0.000737    0.00121   -0.61  0.54247 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## RMSE: 0.078253     Adj. R2: 0.741699
##                  Within R2: 3.579e-4

Summary table

library(modelsummary)

models <- list(
  "Top: Centile" = fe_rel_top,
  "Top: Count" = fe_abs_top,
  "Top: Age" = fe_age_top,
  "Sampled: Centile" = fe_rel_sam,
  "Sampled: Count" = fe_abs_sam,
  "Sampled: Age" = fe_age_sam
)

modelsummary(
  models,
  stars = c('*' = .05, '**' = .01, '***' = .001),
  gof_map = c("nobs", "r2.within", "FE: app_id"),
  title = "Fixed effects models of within-game rating trajectories"
)
Fixed effects models of within-game rating trajectories
Top: Centile Top: Count Top: Age Sampled: Centile Sampled: Count Sampled: Age
* p < 0.05, ** p < 0.01, *** p < 0.001
mean_centile -0.013*** -0.008
(0.001) (0.010)
scale(mean_order) -0.005*** 0.002
(0.000) (0.005)
mean_age_years -0.002*** -0.001
(0.000) (0.001)
Num.Obs. 118070 118070 118070 42584 42584 42584
R2 Within 0.021 0.020 0.018 0.001 0.000 0.000
FE: app_id X X X X X X
modelsummary(
  models,
  stars = c('*' = .05, '**' = .01, '***' = .001),
  gof_map = c("nobs", "r2.within", "FE: app_id"),
  output = "figs/fe_models_table.png",
  title = "Fixed effects models of within-game rating trajectories"
)

Per-game slope diagnostics

Why is the top-rated slope significant but the sampled one isn’t, despite the sampled plot showing a larger visual trend? Within-game noise is much higher for sampled games.

game_slopes <- votes_binned %>%
  group_by(app_id, source) %>%
  filter(n() >= 5) %>%
  summarize(
    slope = coef(lm(mean_rating ~ mean_order, weights = n))[2],
    n_bins = n(),
    sd_rating = sd(mean_rating),
    .groups = "drop"
  )

game_slopes %>%
  group_by(source) %>%
  summarize(
    n_games = n(),
    mean_slope = mean(slope),
    sd_slope = sd(slope),
    median_slope = median(slope),
    mean_within_sd = mean(sd_rating),
    mean_bins = mean(n_bins),
    .groups = "drop"
  )

Nonlinear (spline) models

library(splines)

fe_rel_ns <- feols(mean_rating ~ ns(mean_centile, 4) | app_id, data = votes_binned, weights = ~n, vcov = "cluster")
## NOTE: 3 fixed-effect singletons were removed (3 observations).
fe_abs_ns <- feols(mean_rating ~ ns(mean_order, 4) | app_id, data = votes_binned, weights = ~n, vcov = "cluster")
## NOTE: 3 fixed-effect singletons were removed (3 observations).
fe_age_ns <- feols(mean_rating ~ ns(mean_age_years, 4) | app_id, data = votes_binned_age, weights = ~n, vcov = "cluster")
## NOTE: 3 fixed-effect singletons were removed (3 observations).
summary(fe_rel_ns)
## OLS estimation, Dep. Var.: mean_rating
## Observations: 160,654
## Weights: n
## Fixed-effects: app_id: 997
## Standard-errors: Clustered (app_id) 
##                      Estimate Std. Error t value   Pr(>|t|)    
## ns(mean_centile, 4)1  0.00470    0.00317   1.482 1.3864e-01    
## ns(mean_centile, 4)2 -0.00184    0.00337  -0.546 5.8530e-01    
## ns(mean_centile, 4)3 -0.00738    0.00536  -1.377 1.6887e-01    
## ns(mean_centile, 4)4 -0.01465    0.00327  -4.481 8.2784e-06 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## RMSE: 0.045333     Adj. R2: 0.811147
##                  Within R2: 0.009858
summary(fe_abs_ns)
## OLS estimation, Dep. Var.: mean_rating
## Observations: 160,654
## Weights: n
## Fixed-effects: app_id: 997
## Standard-errors: Clustered (app_id) 
##                     Estimate Std. Error t value   Pr(>|t|)    
## ns(mean_order, 4)1  0.000725    0.00333   0.217 8.2795e-01    
## ns(mean_order, 4)2 -0.023596    0.00658  -3.586 3.5167e-04 ***
## ns(mean_order, 4)3 -0.044393    0.00600  -7.400 2.8812e-13 ***
## ns(mean_order, 4)4 -0.048554    0.00230 -21.116  < 2.2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## RMSE: 0.045457     Adj. R2: 0.810109
##                  Within R2: 0.004415
summary(fe_age_ns)
## OLS estimation, Dep. Var.: mean_rating
## Observations: 160,654
## Weights: n
## Fixed-effects: app_id: 997
## Standard-errors: Clustered (app_id) 
##                        Estimate Std. Error t value  Pr(>|t|)    
## ns(mean_age_years, 4)1 -0.00957    0.00324   -2.95 0.0032567 ** 
## ns(mean_age_years, 4)2 -0.00928    0.00666   -1.39 0.1641186    
## ns(mean_age_years, 4)3 -0.01375    0.00860   -1.60 0.1100888    
## ns(mean_age_years, 4)4 -0.02328    0.01083   -2.15 0.0318862 *  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## RMSE: 0.04543     Adj. R2: 0.810338
##                 Within R2: 0.005618

Cross-sectional correlations

# Pearson (log-transformed count) and Spearman correlations by source
game_summary %>%
  left_join(target_games %>% select(app_id, total_reviews), by = "app_id") %>%
  group_by(source) %>%
  summarize(
    pearson = cor(log10(total_reviews), mean_rating, method = "pearson"),
    spearman = cor(total_reviews, mean_rating, method = "spearman"),
    n = n(),
    .groups = "drop"
  )

Predicting long-term rating from early reviews

Do early reviews predict the long-term rating? Or is there a recency bias that inflates initial scores?

# Compute 1-month and 5-year ratings for games with 5+ years of data
predict_data <- votes_ordered %>%
  filter(app_id %in% games_5yr) %>%
  group_by(app_id) %>%
  mutate(days_since_first = (timestamp_created - min(timestamp_created)) / 86400) %>%
  ungroup()

rating_1mo <- predict_data %>%
  filter(days_since_first <= 30) %>%
  group_by(app_id, source) %>%
  summarize(rating_1mo = mean(voted_up), n_1mo = n(), .groups = "drop")

rating_5yr <- predict_data %>%
  filter(days_since_first >= 4 * 365, days_since_first <= 5 * 365) %>%
  group_by(app_id, source) %>%
  summarize(rating_5yr = mean(voted_up), n_5yr = n(), .groups = "drop")

predict_df <- inner_join(rating_1mo, rating_5yr, by = c("app_id", "source")) %>%
  filter(n_1mo >= 10, n_5yr >= 10)
fit_predict <- lm(rating_5yr ~ rating_1mo * source, data = predict_df)
summary(fit_predict)
## 
## Call:
## lm(formula = rating_5yr ~ rating_1mo * source, data = predict_df)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -0.5226 -0.0168  0.0077  0.0590  0.2681 
## 
## Coefficients:
##                            Estimate Std. Error t value Pr(>|t|)    
## (Intercept)                  0.2945     0.0388    7.59  1.8e-13 ***
## rating_1mo                   0.5872     0.0453   12.96  < 2e-16 ***
## sourcetop_rated              0.5888     0.4946    1.19     0.23    
## rating_1mo:sourcetop_rated  -0.5016     0.5067   -0.99     0.32    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.106 on 457 degrees of freedom
## Multiple R-squared:  0.496,  Adjusted R-squared:  0.493 
## F-statistic:  150 on 3 and 457 DF,  p-value: <2e-16
predict_df %>%
  ggplot(aes(rating_1mo, rating_5yr, color = source)) +
  geom_point(alpha = 0.5) +
  geom_smooth(method = "lm", aes(group = 1), color = "blue") +
  geom_abline(slope = 1, intercept = 0, linetype = "dashed", alpha = 0.5) +
  scale_x_continuous(labels = scales::percent_format()) +
  scale_y_continuous(labels = scales::percent_format()) +
  labs(
    x = "Rating after 1 month",
    y = "Rating after 5 years",
    title = "Predicting long-term rating from early reviews",
    subtitle = glue("R² = {round(summary(fit_predict)$r.squared, 2)}, n = {nrow(predict_df)} games with 5+ years of data. Dashed = identity line.")
  )
## `geom_smooth()` using formula = 'y ~ x'

The slope of ~0.59 (not 1.0) indicates regression to the mean. Since the FE models show no meaningful within-game rating change over time, this is pure statistical regression: early ratings are noisy estimates that converge to the true rating as more reviews accumulate. Unlike IMDB, there is no recency bias — early Steam reviews are noisy but unbiased.

Meta

write_sessioninfo()
## R version 4.5.2 (2025-10-31)
## Platform: x86_64-pc-linux-gnu
## Running under: Linux Mint 21.1
## 
## Matrix products: default
## BLAS:   /usr/lib/x86_64-linux-gnu/blas/libblas.so.3.10.0 
## LAPACK: /usr/lib/x86_64-linux-gnu/lapack/liblapack.so.3.10.0  LAPACK version 3.10.0
## 
## locale:
##  [1] LC_CTYPE=en_US.UTF-8       LC_NUMERIC=C              
##  [3] LC_TIME=en_US.UTF-8        LC_COLLATE=en_US.UTF-8    
##  [5] LC_MONETARY=en_DK.UTF-8    LC_MESSAGES=en_US.UTF-8   
##  [7] LC_PAPER=en_DK.UTF-8       LC_NAME=C                 
##  [9] LC_ADDRESS=C               LC_TELEPHONE=C            
## [11] LC_MEASUREMENT=en_DK.UTF-8 LC_IDENTIFICATION=C       
## 
## time zone: Europe/Brussels
## tzcode source: system (glibc)
## 
## attached base packages:
## [1] splines   stats     graphics  grDevices utils     datasets  methods  
## [8] base     
## 
## other attached packages:
##  [1] arrow_23.0.1.1        modelsummary_2.5.0    fixest_0.13.2        
##  [4] httr2_1.2.1           btw_1.1.0             glue_1.8.0           
##  [7] lme4_1.1-37           Matrix_1.7-4          RSQLite_2.4.3        
## [10] kirkegaard_2025-11-19 robustbase_0.99-6     psych_2.5.6          
## [13] assertthat_0.2.1      weights_1.1.2         magrittr_2.0.4       
## [16] lubridate_1.9.4       forcats_1.0.1         stringr_1.6.0        
## [19] dplyr_1.1.4           purrr_1.2.0           readr_2.1.5          
## [22] tidyr_1.3.1           tibble_3.3.0          ggplot2_4.0.1.9000   
## [25] tidyverse_2.0.0      
## 
## loaded via a namespace (and not attached):
##   [1] RColorBrewer_1.1-3  rstudioapi_0.17.1   jsonlite_2.0.0     
##   [4] shape_1.4.6.1       datawizard_1.3.0    TH.data_1.1-4      
##   [7] estimability_1.5.1  jomo_2.7-6          farver_2.1.2       
##  [10] nloptr_2.2.1        rmarkdown_2.30      fs_1.6.6           
##  [13] ragg_1.5.0          vctrs_0.6.5         memoise_2.0.1      
##  [16] minqa_1.2.8         ellmer_0.4.0        effectsize_1.0.1   
##  [19] askpass_1.2.1       base64enc_0.1-3     htmltools_0.5.8.1  
##  [22] curl_7.0.0          broom_1.0.11        Formula_1.2-5      
##  [25] mitml_0.4-5         sass_0.4.10         parallelly_1.45.1  
##  [28] bslib_0.9.0         htmlwidgets_1.6.4   sandwich_3.1-1     
##  [31] emmeans_1.11.2-8    zoo_1.8-14          cachem_1.1.0       
##  [34] lifecycle_1.0.4     iterators_1.0.14    pkgconfig_2.0.3    
##  [37] webshot2_0.1.2      R6_2.6.1            fastmap_1.2.0      
##  [40] future_1.67.0       rbibutils_2.3       digest_0.6.39      
##  [43] numDeriv_2016.8-1.1 colorspace_2.1-2    ps_1.9.1           
##  [46] textshaping_1.0.4   Hmisc_5.2-4         labeling_0.4.3     
##  [49] tinytable_0.14.0    fansi_1.0.6         timechange_0.3.0   
##  [52] gdata_3.0.1         mgcv_1.9-3          compiler_4.5.2     
##  [55] bit64_4.6.0-1       withr_3.0.2         htmlTable_2.4.3    
##  [58] S7_0.2.1            backports_1.5.0     DBI_1.2.3          
##  [61] performance_0.15.2  pan_1.9             MASS_7.3-65        
##  [64] mcptools_0.2.0      openssl_2.3.4       rappdirs_0.3.3     
##  [67] gtools_3.9.5        chromote_0.5.1      tools_4.5.2        
##  [70] foreign_0.8-91      future.apply_1.20.0 nnet_7.3-20        
##  [73] nlme_3.1-168        stringmagic_1.2.0   promises_1.3.3     
##  [76] grid_4.5.2          rsconnect_1.5.1     checkmate_2.3.3    
##  [79] cluster_2.1.8.2     generics_0.1.4      gtable_0.3.6       
##  [82] tzdb_0.5.0          websocket_1.4.4     data.table_1.17.8  
##  [85] hms_1.1.3           utf8_1.2.6          tables_0.9.31      
##  [88] foreach_1.5.2       pillar_1.11.1       vroom_1.6.6        
##  [91] later_1.4.4         lattice_0.22-7      survival_3.8-6     
##  [94] bit_4.6.0           dreamerr_1.5.0      tidyselect_1.2.1   
##  [97] coro_1.1.0          knitr_1.50          reformulas_0.4.1   
## [100] gridExtra_2.3      
##  [ reached 'max' / getOption("max.print") -- omitted 34 entries ]