Init

library(kirkegaard)
library(arrow)
library(lme4)
library(glue)

theme_set(theme_bw())
options(digits = 3)

if (F) {
  library(btw)
  btw::btw_mcp_session()
}

Data

target_top <- read_csv("data/steamdb_top_rated.csv") %>%
  mutate(app_id = as.integer(app_id), source = "top_rated")

## [1mRows: [22m[34m500[39m [1mColumns: [22m[34m5[39m
## [36m──[39m [1mColumn specification[22m [36m────────────────────────────────────────────────────────[39m
## [1mDelimiter:[22m ","
## [31mchr[39m (1): name
## [32mdbl[39m (4): rank, app_id, rating_pct, total_reviews
## 
## [36mℹ[39m Use `spec()` to retrieve the full column specification for this data.
## [36mℹ[39m Specify the column types or set `show_col_types = FALSE` to quiet this message.

target_sampled <- read_csv("data/sampled_games.csv") %>%
  mutate(app_id = as.integer(app_id), source = "sampled")

## [1mRows: [22m[34m500[39m [1mColumns: [22m[34m5[39m
## [36m──[39m [1mColumn specification[22m [36m────────────────────────────────────────────────────────[39m
## [1mDelimiter:[22m ","
## [31mchr[39m (1): name
## [32mdbl[39m (4): app_id, total_reviews, positive, rating_pct
## 
## [36mℹ[39m Use `spec()` to retrieve the full column specification for this data.
## [36mℹ[39m Specify the column types or set `show_col_types = FALSE` to quiet this message.

target_games <- bind_rows(
  target_top %>% select(app_id, name, rating_pct, total_reviews, source),
  target_sampled %>% select(app_id, name, rating_pct, total_reviews, source)
)

# read scraped reviews from parquet
votes <- read_parquet("data/steam_review_votes.parquet") %>%
  as_tibble() %>%
  mutate(
    app_id = as.integer(app_id),
    voted_up = as.logical(voted_up)
  )

votes

Data integrity

n_total <- nrow(votes)
n_unique <- n_distinct(votes$recommendationid)
cat("Total rows:", n_total, "\n")

## Total rows: 16023234

cat("Unique recommendation IDs:", n_unique, "\n")

## Unique recommendation IDs: 16023234

cat("Duplicates:", n_total - n_unique, "\n")

## Duplicates: 0

Per-game summary

game_summary <- votes %>%
  group_by(app_id) %>%
  summarize(
    n_reviews = n(),
    mean_rating = mean(voted_up),
    .groups = "drop"
  ) %>%
  left_join(target_games %>% select(app_id, name, rating_pct, source), by = "app_id") %>%
  arrange(desc(n_reviews))

game_summary

# distribution of review counts by source
target_games %>%
  ggplot(aes(total_reviews, fill = source)) +
  geom_histogram(alpha = 0.7, position = "identity", bins = 40) +
  scale_x_log10(labels = scales::comma) +
  labs(
    x = "Total reviews (log scale)",
    y = "Count",
    title = "Distribution of review counts",
    subtitle = glue("{nrow(target_games)} games (500 top-rated + 500 sampled)")
  )

# cross-sectional: rating vs review count by source
game_summary %>%
  left_join(target_games %>% select(app_id, total_reviews), by = "app_id") %>%
  ggplot(aes(total_reviews, mean_rating, color = source)) +
  geom_point(alpha = 0.5) +
  geom_smooth(method = "lm") +
  scale_x_log10(labels = scales::comma) +
  scale_y_continuous(labels = scales::percent_format()) +
  labs(
    x = "Total reviews (log scale)",
    y = "Mean positive rating",
    title = "Rating vs review count across games",
    subtitle = glue("{nrow(game_summary)} games")
  )

## [37m`geom_smooth()` using formula = 'y ~ x'[39m

Within-game rating trajectory

Do games get rated more or less positively over their review history?

# add within-game review order and centile
votes_ordered <- votes %>%
  left_join(target_games %>% select(app_id, source), by = "app_id") %>%
  group_by(app_id) %>%
  arrange(timestamp_created, .by_group = TRUE) %>%
  mutate(
    review_order = row_number(),
    review_centile = review_order / n()
  ) %>%
  ungroup()

Bin into windows for modeling

# bin every 100 reviews for efficiency
votes_binned <- votes_ordered %>%
  group_by(app_id) %>%
  mutate(bin = (review_order - 1) %/% 100 + 1) %>%
  group_by(app_id, bin) %>%
  summarize(
    n = n(),
    mean_rating = mean(voted_up),
    mean_centile = mean(review_centile),
    mean_order = mean(review_order),
    source = first(source),
    .groups = "drop"
  )

votes_binned

Model: absolute review count

Does rating change as a function of how many reviews a game has accumulated?

fit_abs <- lmer(mean_rating ~ scale(mean_order) * source + (1 | app_id), data = votes_binned, weights = n)
summary(fit_abs)

## Linear mixed model fit by REML ['lmerMod']
## Formula: mean_rating ~ scale(mean_order) * source + (1 | app_id)
##    Data: votes_binned
## Weights: n
## 
## REML criterion at convergence: -530062
## 
## Scaled residuals: 
##     Min      1Q  Median      3Q     Max 
## -18.037  -0.315   0.068   0.394  10.997 
## 
## Random effects:
##  Groups   Name        Variance Std.Dev.
##  app_id   (Intercept) 0.0087   0.0933  
##  Residual             0.2073   0.4553  
## Number of obs: 160657, groups:  app_id, 1000
## 
## Fixed effects:
##                                    Estimate Std. Error t value
## (Intercept)                        0.817040   0.004202  194.43
## scale(mean_order)                  0.003051   0.000440    6.94
## sourcetop_rated                    0.153551   0.005927   25.91
## scale(mean_order):sourcetop_rated -0.007691   0.000472  -16.29
## 
## Correlation of Fixed Effects:
##             (Intr) scl(_) srctp_
## scl(mn_rdr)  0.081              
## sourctp_rtd -0.709 -0.057       
## scl(mn_r):_ -0.075 -0.931  0.057

Model: relative position (centile)

Does rating change as a function of where in the game’s review history we are?

fit_rel <- lmer(mean_rating ~ mean_centile * source + (1 | app_id), data = votes_binned, weights = n)
summary(fit_rel)

## Linear mixed model fit by REML ['lmerMod']
## Formula: mean_rating ~ mean_centile * source + (1 | app_id)
##    Data: votes_binned
## Weights: n
## 
## REML criterion at convergence: -530149
## 
## Scaled residuals: 
##     Min      1Q  Median      3Q     Max 
## -18.143  -0.312   0.068   0.392  11.092 
## 
## Random effects:
##  Groups   Name        Variance Std.Dev.
##  app_id   (Intercept) 0.00872  0.0934  
##  Residual             0.20719  0.4552  
## Number of obs: 160657, groups:  app_id, 1000
## 
## Fixed effects:
##                               Estimate Std. Error t value
## (Intercept)                   0.818542   0.004211  194.40
## mean_centile                 -0.007717   0.000766  -10.07
## sourcetop_rated               0.160055   0.005941   26.94
## mean_centile:sourcetop_rated -0.004901   0.000894   -5.49
## 
## Correlation of Fixed Effects:
##             (Intr) mn_cnt srctp_
## mean_centil -0.091              
## sourctp_rtd -0.709  0.065       
## mn_cntl:sr_  0.078 -0.858 -0.075

Visualize trajectories

# average rating trajectory by source
votes_ordered %>%
  mutate(centile_bin = round(review_centile, 2)) %>%
  group_by(centile_bin, source) %>%
  summarize(
    mean_rating = mean(voted_up),
    n = n(),
    .groups = "drop"
  ) %>%
  ggplot(aes(centile_bin, mean_rating, color = source)) +
  geom_line() +
  scale_y_continuous(labels = scales::percent_format()) +
  labs(
    x = "Review centile (0 = earliest, 1 = latest)",
    y = "Mean positive rating",
    title = "Average rating trajectory by source"
  )

# limit to games with 5+ years of review history, plot first 5 years
game_age <- votes_ordered %>%
  group_by(app_id) %>%
  summarize(age_days = (max(timestamp_created) - min(timestamp_created)) / 86400, .groups = "drop")
games_5yr <- game_age %>% filter(age_days >= 5 * 365) %>% pull(app_id)

votes_ordered %>%
  filter(app_id %in% games_5yr) %>%
  group_by(app_id) %>%
  mutate(days_since_first = (timestamp_created - min(timestamp_created)) / 86400) %>%
  ungroup() %>%
  filter(days_since_first <= 5 * 365) %>%
  mutate(age_bin = floor(days_since_first / 30) * 30) %>%
  group_by(age_bin, source) %>%
  summarize(
    mean_rating = mean(voted_up),
    n = n(),
    .groups = "drop"
  ) %>%
  ggplot(aes(age_bin / 365, mean_rating, color = source)) +
  geom_line() +
  scale_y_continuous(labels = scales::percent_format()) +
  labs(
    x = "Years since first review",
    y = "Mean positive rating",
    title = "Rating trajectory by game age",
    subtitle = glue("{length(games_5yr)} games with 5+ years of review history")
  )

# limit to games with 10k+ reviews, plot first 10k
games_10k <- game_summary %>% filter(n_reviews >= 10000) %>% pull(app_id)

votes_ordered %>%
  filter(app_id %in% games_10k, review_order <= 10000) %>%
  mutate(count_bin = (review_order - 1) %/% 200 + 1) %>%
  group_by(count_bin, source) %>%
  summarize(
    mean_rating = mean(voted_up),
    n_games = n_distinct(app_id),
    mean_order = mean(review_order),
    .groups = "drop"
  ) %>%
  ggplot(aes(mean_order, mean_rating, color = source)) +
  geom_line() +
  scale_x_continuous(labels = scales::comma) +
  scale_y_continuous(labels = scales::percent_format()) +
  labs(
    x = "Review count",
    y = "Mean positive rating",
    title = "Rating trajectory by review count",
    subtitle = glue("{length(games_10k)} games with 10k+ reviews")
  )

# individual game trajectories (top reviewed games)
top_games <- game_summary %>% slice_max(n_reviews, n = 20)

votes_ordered %>%
  filter(app_id %in% top_games$app_id) %>%
  mutate(centile_bin = round(review_centile * 20) / 20) %>%
  group_by(app_id, centile_bin) %>%
  summarize(mean_rating = mean(voted_up), .groups = "drop") %>%
  left_join(target_games %>% select(app_id, name), by = "app_id") %>%
  ggplot(aes(centile_bin, mean_rating, color = name)) +
  geom_line(alpha = 0.7) +
  scale_y_continuous(labels = scales::percent_format()) +
  labs(
    x = "Review centile",
    y = "Mean positive rating",
    title = "Rating trajectories for top reviewed games"
  ) +
  theme(legend.position = "right")

Fixed effects models (fixest)

Game fixed effects with clustered standard errors — more conservative than lmer random effects.

Source main effect is absorbed by game FE. We split models by source to test for differential effects cleanly.

library(fixest)

# Age predictor for binned data
votes_binned_age <- votes_ordered %>%
  group_by(app_id) %>%
  mutate(days_since_first = (timestamp_created - min(timestamp_created)) / 86400) %>%
  ungroup() %>%
  group_by(app_id) %>%
  mutate(bin = (review_order - 1) %/% 100 + 1) %>%
  group_by(app_id, bin) %>%
  summarize(
    n = n(),
    mean_rating = mean(voted_up),
    mean_age_years = mean(days_since_first) / 365,
    source = first(source),
    .groups = "drop"
  )

# Pooled (all games)
fe_rel <- feols(mean_rating ~ mean_centile | app_id, data = votes_binned, weights = ~n, vcov = "cluster")

## NOTE: 3 fixed-effect singletons were removed (3 observations).

fe_abs <- feols(mean_rating ~ scale(mean_order) | app_id, data = votes_binned, weights = ~n, vcov = "cluster")

## NOTE: 3 fixed-effect singletons were removed (3 observations).

fe_age <- feols(mean_rating ~ mean_age_years | app_id, data = votes_binned_age, weights = ~n, vcov = "cluster")

## NOTE: 3 fixed-effect singletons were removed (3 observations).

summary(fe_rel)

## OLS estimation, Dep. Var.: mean_rating
## Observations: 160,654
## Weights: n
## Fixed-effects: app_id: 997
## Standard-errors: Clustered (app_id) 
##              Estimate Std. Error t value   Pr(>|t|)    
## mean_centile  -0.0113    0.00286   -3.96 7.9223e-05 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## RMSE: 0.045441     Adj. R2: 0.810252
##                  Within R2: 0.005147

summary(fe_abs)

## OLS estimation, Dep. Var.: mean_rating
## Observations: 160,654
## Weights: n
## Fixed-effects: app_id: 997
## Standard-errors: Clustered (app_id) 
##                   Estimate Std. Error t value   Pr(>|t|)    
## scale(mean_order) -0.00361    0.00104   -3.47 0.00054022 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## RMSE: 0.045486     Adj. R2: 0.809873
##                  Within R2: 0.003157

summary(fe_age)

## OLS estimation, Dep. Var.: mean_rating
## Observations: 160,654
## Weights: n
## Fixed-effects: app_id: 997
## Standard-errors: Clustered (app_id) 
##                Estimate Std. Error t value  Pr(>|t|)    
## mean_age_years -0.00165   0.000533   -3.09 0.0020894 ** 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## RMSE: 0.045477     Adj. R2: 0.809952
##                  Within R2: 0.003576

# Split by source: top rated
bv_top <- votes_binned %>% filter(source == "top_rated")
ba_top <- votes_binned_age %>% filter(source == "top_rated")

fe_rel_top <- feols(mean_rating ~ mean_centile | app_id, data = bv_top, weights = ~n, vcov = "cluster")

## NOTE: 3 fixed-effect singletons were removed (3 observations).

fe_abs_top <- feols(mean_rating ~ scale(mean_order) | app_id, data = bv_top, weights = ~n, vcov = "cluster")

## NOTE: 3 fixed-effect singletons were removed (3 observations).

fe_age_top <- feols(mean_rating ~ mean_age_years | app_id, data = ba_top, weights = ~n, vcov = "cluster")

## NOTE: 3 fixed-effect singletons were removed (3 observations).

summary(fe_rel_top)

## OLS estimation, Dep. Var.: mean_rating
## Observations: 118,070
## Weights: n
## Fixed-effects: app_id: 497
## Standard-errors: Clustered (app_id) 
##              Estimate Std. Error t value  Pr(>|t|)    
## mean_centile  -0.0126    0.00135   -9.32 < 2.2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## RMSE: 0.024652     Adj. R2: 0.231137
##                  Within R2: 0.021364

summary(fe_abs_top)

## OLS estimation, Dep. Var.: mean_rating
## Observations: 118,070
## Weights: n
## Fixed-effects: app_id: 497
## Standard-errors: Clustered (app_id) 
##                   Estimate Std. Error t value  Pr(>|t|)    
## scale(mean_order) -0.00494   0.000444   -11.1 < 2.2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## RMSE: 0.024663     Adj. R2: 0.230426
##                  Within R2: 0.020459

summary(fe_age_top)

## OLS estimation, Dep. Var.: mean_rating
## Observations: 118,070
## Weights: n
## Fixed-effects: app_id: 497
## Standard-errors: Clustered (app_id) 
##                Estimate Std. Error t value   Pr(>|t|)    
## mean_age_years -0.00223   0.000348    -6.4 3.5217e-10 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## RMSE: 0.024692     Adj. R2: 0.228609
##                  Within R2: 0.018146

# Split by source: sampled
bv_sam <- votes_binned %>% filter(source == "sampled")
ba_sam <- votes_binned_age %>% filter(source == "sampled")

fe_rel_sam <- feols(mean_rating ~ mean_centile | app_id, data = bv_sam, weights = ~n, vcov = "cluster")
fe_abs_sam <- feols(mean_rating ~ scale(mean_order) | app_id, data = bv_sam, weights = ~n, vcov = "cluster")
fe_age_sam <- feols(mean_rating ~ mean_age_years | app_id, data = ba_sam, weights = ~n, vcov = "cluster")

summary(fe_rel_sam)

## OLS estimation, Dep. Var.: mean_rating
## Observations: 42,584
## Weights: n
## Fixed-effects: app_id: 500
## Standard-errors: Clustered (app_id) 
##              Estimate Std. Error t value Pr(>|t|) 
## mean_centile -0.00772     0.0101  -0.764   0.4451 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## RMSE: 0.078235     Adj. R2: 0.741815
##                  Within R2: 8.097e-4

summary(fe_abs_sam)

## OLS estimation, Dep. Var.: mean_rating
## Observations: 42,584
## Weights: n
## Fixed-effects: app_id: 500
## Standard-errors: Clustered (app_id) 
##                   Estimate Std. Error t value Pr(>|t|) 
## scale(mean_order)  0.00216    0.00501   0.431  0.66677 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## RMSE: 0.078252     Adj. R2: 0.741705
##                  Within R2: 3.805e-4

summary(fe_age_sam)

## OLS estimation, Dep. Var.: mean_rating
## Observations: 42,584
## Weights: n
## Fixed-effects: app_id: 500
## Standard-errors: Clustered (app_id) 
##                 Estimate Std. Error t value Pr(>|t|) 
## mean_age_years -0.000737    0.00121   -0.61  0.54247 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## RMSE: 0.078253     Adj. R2: 0.741699
##                  Within R2: 3.579e-4

Summary table

library(modelsummary)

models <- list(
  "Top: Centile" = fe_rel_top,
  "Top: Count" = fe_abs_top,
  "Top: Age" = fe_age_top,
  "Sampled: Centile" = fe_rel_sam,
  "Sampled: Count" = fe_abs_sam,
  "Sampled: Age" = fe_age_sam
)

modelsummary(
  models,
  stars = c('*' = .05, '**' = .01, '***' = .001),
  gof_map = c("nobs", "r2.within", "FE: app_id"),
  title = "Fixed effects models of within-game rating trajectories"
)

Fixed effects models of within-game rating trajectories
	Top: Centile	Top: Count	Top: Age	Sampled: Centile	Sampled: Count	Sampled: Age
* p < 0.05, p < 0.01, * p < 0.001
mean_centile	-0.013***			-0.008
	(0.001)			(0.010)
scale(mean_order)		-0.005***			0.002
		(0.000)			(0.005)
mean_age_years			-0.002***			-0.001
			(0.000)			(0.001)
Num.Obs.	118070	118070	118070	42584	42584	42584
R2 Within	0.021	0.020	0.018	0.001	0.000	0.000
FE: app_id	X	X	X	X	X	X

modelsummary(
  models,
  stars = c('*' = .05, '**' = .01, '***' = .001),
  gof_map = c("nobs", "r2.within", "FE: app_id"),
  output = "figs/fe_models_table.png",
  title = "Fixed effects models of within-game rating trajectories"
)

Per-game slope diagnostics

Why is the top-rated slope significant but the sampled one isn’t, despite the sampled plot showing a larger visual trend? Within-game noise is much higher for sampled games.

game_slopes <- votes_binned %>%
  group_by(app_id, source) %>%
  filter(n() >= 5) %>%
  summarize(
    slope = coef(lm(mean_rating ~ mean_order, weights = n))[2],
    n_bins = n(),
    sd_rating = sd(mean_rating),
    .groups = "drop"
  )

game_slopes %>%
  group_by(source) %>%
  summarize(
    n_games = n(),
    mean_slope = mean(slope),
    sd_slope = sd(slope),
    median_slope = median(slope),
    mean_within_sd = mean(sd_rating),
    mean_bins = mean(n_bins),
    .groups = "drop"
  )

Nonlinear (spline) models

library(splines)

fe_rel_ns <- feols(mean_rating ~ ns(mean_centile, 4) | app_id, data = votes_binned, weights = ~n, vcov = "cluster")

## NOTE: 3 fixed-effect singletons were removed (3 observations).

fe_abs_ns <- feols(mean_rating ~ ns(mean_order, 4) | app_id, data = votes_binned, weights = ~n, vcov = "cluster")

## NOTE: 3 fixed-effect singletons were removed (3 observations).

fe_age_ns <- feols(mean_rating ~ ns(mean_age_years, 4) | app_id, data = votes_binned_age, weights = ~n, vcov = "cluster")

## NOTE: 3 fixed-effect singletons were removed (3 observations).

summary(fe_rel_ns)

## OLS estimation, Dep. Var.: mean_rating
## Observations: 160,654
## Weights: n
## Fixed-effects: app_id: 997
## Standard-errors: Clustered (app_id) 
##                      Estimate Std. Error t value   Pr(>|t|)    
## ns(mean_centile, 4)1  0.00470    0.00317   1.482 1.3864e-01    
## ns(mean_centile, 4)2 -0.00184    0.00337  -0.546 5.8530e-01    
## ns(mean_centile, 4)3 -0.00738    0.00536  -1.377 1.6887e-01    
## ns(mean_centile, 4)4 -0.01465    0.00327  -4.481 8.2784e-06 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## RMSE: 0.045333     Adj. R2: 0.811147
##                  Within R2: 0.009858

summary(fe_abs_ns)

## OLS estimation, Dep. Var.: mean_rating
## Observations: 160,654
## Weights: n
## Fixed-effects: app_id: 997
## Standard-errors: Clustered (app_id) 
##                     Estimate Std. Error t value   Pr(>|t|)    
## ns(mean_order, 4)1  0.000725    0.00333   0.217 8.2795e-01    
## ns(mean_order, 4)2 -0.023596    0.00658  -3.586 3.5167e-04 ***
## ns(mean_order, 4)3 -0.044393    0.00600  -7.400 2.8812e-13 ***
## ns(mean_order, 4)4 -0.048554    0.00230 -21.116  < 2.2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## RMSE: 0.045457     Adj. R2: 0.810109
##                  Within R2: 0.004415

summary(fe_age_ns)

## OLS estimation, Dep. Var.: mean_rating
## Observations: 160,654
## Weights: n
## Fixed-effects: app_id: 997
## Standard-errors: Clustered (app_id) 
##                        Estimate Std. Error t value  Pr(>|t|)    
## ns(mean_age_years, 4)1 -0.00957    0.00324   -2.95 0.0032567 ** 
## ns(mean_age_years, 4)2 -0.00928    0.00666   -1.39 0.1641186    
## ns(mean_age_years, 4)3 -0.01375    0.00860   -1.60 0.1100888    
## ns(mean_age_years, 4)4 -0.02328    0.01083   -2.15 0.0318862 *  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## RMSE: 0.04543     Adj. R2: 0.810338
##                 Within R2: 0.005618

Cross-sectional correlations

# Pearson (log-transformed count) and Spearman correlations by source
game_summary %>%
  left_join(target_games %>% select(app_id, total_reviews), by = "app_id") %>%
  group_by(source) %>%
  summarize(
    pearson = cor(log10(total_reviews), mean_rating, method = "pearson"),
    spearman = cor(total_reviews, mean_rating, method = "spearman"),
    n = n(),
    .groups = "drop"
  )

Predicting long-term rating from early reviews

Do early reviews predict the long-term rating? Or is there a recency bias that inflates initial scores?

# Compute 1-month and 5-year ratings for games with 5+ years of data
predict_data <- votes_ordered %>%
  filter(app_id %in% games_5yr) %>%
  group_by(app_id) %>%
  mutate(days_since_first = (timestamp_created - min(timestamp_created)) / 86400) %>%
  ungroup()

rating_1mo <- predict_data %>%
  filter(days_since_first <= 30) %>%
  group_by(app_id, source) %>%
  summarize(rating_1mo = mean(voted_up), n_1mo = n(), .groups = "drop")

rating_5yr <- predict_data %>%
  filter(days_since_first >= 4 * 365, days_since_first <= 5 * 365) %>%
  group_by(app_id, source) %>%
  summarize(rating_5yr = mean(voted_up), n_5yr = n(), .groups = "drop")

predict_df <- inner_join(rating_1mo, rating_5yr, by = c("app_id", "source")) %>%
  filter(n_1mo >= 10, n_5yr >= 10)

fit_predict <- lm(rating_5yr ~ rating_1mo * source, data = predict_df)
summary(fit_predict)

## 
## Call:
## lm(formula = rating_5yr ~ rating_1mo * source, data = predict_df)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -0.5226 -0.0168  0.0077  0.0590  0.2681 
## 
## Coefficients:
##                            Estimate Std. Error t value Pr(>|t|)    
## (Intercept)                  0.2945     0.0388    7.59  1.8e-13 ***
## rating_1mo                   0.5872     0.0453   12.96  < 2e-16 ***
## sourcetop_rated              0.5888     0.4946    1.19     0.23    
## rating_1mo:sourcetop_rated  -0.5016     0.5067   -0.99     0.32    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.106 on 457 degrees of freedom
## Multiple R-squared:  0.496,  Adjusted R-squared:  0.493 
## F-statistic:  150 on 3 and 457 DF,  p-value: <2e-16

predict_df %>%
  ggplot(aes(rating_1mo, rating_5yr, color = source)) +
  geom_point(alpha = 0.5) +
  geom_smooth(method = "lm", aes(group = 1), color = "blue") +
  geom_abline(slope = 1, intercept = 0, linetype = "dashed", alpha = 0.5) +
  scale_x_continuous(labels = scales::percent_format()) +
  scale_y_continuous(labels = scales::percent_format()) +
  labs(
    x = "Rating after 1 month",
    y = "Rating after 5 years",
    title = "Predicting long-term rating from early reviews",
    subtitle = glue("R² = {round(summary(fit_predict)$r.squared, 2)}, n = {nrow(predict_df)} games with 5+ years of data. Dashed = identity line.")
  )

## [37m`geom_smooth()` using formula = 'y ~ x'[39m

The slope of ~0.59 (not 1.0) indicates regression to the mean. Since the FE models show no meaningful within-game rating change over time, this is pure statistical regression: early ratings are noisy estimates that converge to the true rating as more reviews accumulate. Unlike IMDB, there is no recency bias — early Steam reviews are noisy but unbiased.

Steam review history: are recent releases over/under rated?

13 March, 2026