11
=============================================================
TWITCH RECOMMENDER SYSTEM - ANALYSIS SCRIPT
Run section by section in RStudio (Ctrl/Cmd + Enter)
=============================================================
—- Packages —-
Run once if not installed:
install.packages(c(“tidyverse”, “knitr”, “kableExtra”, “scales”, “RColorBrewer”))
library(tidyverse) library(ggplot2) library(knitr) library(kableExtra) library(scales) library(RColorBrewer)
=============================================================
1. INTRODUCTION - WHY TWITCH, WHAT MAKES IT UNIQUE
=============================================================
twitch_facts <- tibble( Metric = c(“Monthly active users”, “Avg. concurrent viewers”, “Active streamers / month”, “Hours watched / month (B)”, “Avg. session length (min)”), Value = c(“140M+”, “2.5M”, “8M”, “1.6”, “95”) ) print(twitch_facts)
content_mix <- tibble( Category = c(“Just Chatting”, “League of Legends”, “GTA V”, “Valorant”, “Minecraft”, “Fortnite”, “Music”, “Other”), Hours_M = c(420, 180, 150, 130, 110, 95, 70, 380) )
ggplot(content_mix, aes(reorder(Category, Hours_M), Hours_M, fill = Category)) + geom_col(show.legend = FALSE) + coord_flip() + labs(title = “Hours Watched by Top Twitch Categories”, x = NULL, y = “Hours watched (millions)”) + theme_minimal() + scale_fill_brewer(palette = “Set2”)
=============================================================
2. SCENARIO DESIGN - USER PERSPECTIVE
=============================================================
user_scenarios <- tibble( Scenario = c(“Returning fan”, “Bored browser”, “Game-focused viewer”, “New user onboarding”, “Mobile, short session”), User_Goal = c(“Catch favorite streamer live”, “Find anything entertaining”, “Watch a specific game”, “Discover the platform”, “Quick 10-min watch”), Signal_Used = c(“Follow list, watch history”, “Watch history, time-of-day”, “Category page, language”, “Sign-up survey, popularity”, “Device type, short clips”), Success_Metric = c(“CTR to followed channel”, “Watch time > 5 min”, “Time on category page”, “Day-7 retention”, “Clip completion rate”) ) print(user_scenarios)
set.seed(42) days <- 1:30 journey <- tibble( day = rep(days, 2), engagement = c(100 * exp(-0.08 * days) + rnorm(30, 0, 3), 60 + 20 * sin(days / 3) + rnorm(30, 0, 4)), user_type = rep(c(“New user”, “Returning fan”), each = 30) )
ggplot(journey, aes(day, engagement, color = user_type)) + geom_line(size = 1.1) + geom_point() + labs(title = “Simulated Engagement Patterns by User Type”, x = “Day”, y = “Daily watch minutes”, color = “User type”) + theme_minimal() + scale_color_manual(values = c(“#9146FF”, “#00B5AD”))
=============================================================
3. SCENARIO DESIGN - BUSINESS PERSPECTIVE
=============================================================
biz_objectives <- tibble( Objective = c(“Maximize watch time”, “Grow subs/Bits”, “Support emerging streamers”, “Reduce churn”, “Brand safety”), KPI = c(“Avg. minutes/session”, “Conversion to paid”, “% views to <1k follower channels”, “Day-30 retention”, “% flagged streams surfaced”), Lever = c(“Personalized ‘Live now’ rail”, “Promote sub-only perks”, “Diversity boost in feed”, “Re-engagement notifications”, “Filter low-trust signals”) ) print(biz_objectives)
tradeoff <- tibble( Strategy = c(“Pure popularity”, “Pure personalization”, “Diversity-aware”, “Exploration boost”, “Hybrid (current)”), Watch_Time = c(0.85, 0.78, 0.72, 0.65, 0.80), Ecosystem_Health = c(0.30, 0.50, 0.85, 0.90, 0.75) )
ggplot(tradeoff, aes(Watch_Time, Ecosystem_Health, label = Strategy)) + geom_point(size = 5, color = “#9146FF”) + geom_text(vjust = -1.2, size = 3.8) + xlim(0.5, 0.95) + ylim(0.2, 1.0) + labs(title = “Recommender Strategy Tradeoffs”, x = “Short-term watch time”, y = “Long-term ecosystem health”) + theme_minimal()
=============================================================
4. REVERSE ENGINEERING - SIGNALS, ALGORITHM, LIVE CHALLENGE
=============================================================
NOTE: written without $ access in the scoring function to make
this script robust to copy-paste through PDF/Word editors that
sometimes strip the dollar sign.
set.seed(123)
streamers <- tibble( streamer_id = paste0(“S”, 1:8), name = c(“xQc”, “Pokimane”, “shroud”, “Ninja”, “Asmongold”, “Valkyrae”, “summit1g”, “TommyInnit”), category = c(“Just Chatting”, “Just Chatting”, “Valorant”, “Fortnite”, “WoW”, “GTA V”, “GTA V”, “Minecraft”), language = c(“EN”, “EN”, “EN”, “EN”, “EN”, “EN”, “EN”, “EN”), avg_viewers = c(70000, 25000, 30000, 20000, 60000, 18000, 22000, 40000), is_live = c(TRUE, TRUE, FALSE, TRUE, TRUE, FALSE, TRUE, TRUE) )
users <- tibble( user_id = paste0(“U”, 1:5), fav_category = c(“Just Chatting”, “Valorant”, “Minecraft”, “GTA V”, “Just Chatting”), fav_language = rep(“EN”, 5) )
watch_matrix <- matrix( c(120, 60, 0, 10, 80, 0, 5, 0, 0, 5, 200, 0, 0, 0, 20, 0, 10, 0, 0, 30, 5, 0, 0, 240, 0, 15, 10, 0, 0, 180, 150, 0, 90, 70, 0, 0, 110, 0, 0, 20), nrow = 5, byrow = TRUE ) rownames(watch_matrix) <- pull(users, user_id) colnames(watch_matrix) <- pull(streamers, streamer_id) print(watch_matrix)
Hybrid scoring function (uses pull() instead of $ to avoid copy issues)
score_streamers <- function(uid, w_cf = 0.5, w_content = 0.3, w_pop = 0.2) {
user_row <- filter(users, user_id == uid) fav_cat <- pull(user_row, fav_category) fav_lang <- pull(user_row, fav_language)
s_category <- pull(streamers, category) s_language <- pull(streamers, language) s_viewers <- pull(streamers, avg_viewers) s_is_live <- pull(streamers, is_live)
# Collaborative-filtering proxy cf <- watch_matrix[uid, ] cf <- cf / max(cf, 1)
# Content match content <- (s_category == fav_cat) * 1.0 + (s_language == fav_lang) * 0.3 content <- pmin(content / 1.3, 1)
# Popularity / live boost pop <- s_viewers / max(s_viewers) pop <- pop * ifelse(s_is_live, 1, 0.2)
streamers %>% mutate(cf = round(cf, 2), content = round(content, 2), popularity = round(pop, 2), final_score = round(w_cf * cf + w_content * content + w_pop * pop, 3)) %>% arrange(desc(final_score)) }
recs_u1 <- score_streamers(“U1”) print(recs_u1)
ggplot(recs_u1, aes(reorder(name, final_score), final_score, fill = is_live)) + geom_col() + coord_flip() + scale_fill_manual(values = c(“TRUE” = “#9146FF”, “FALSE” = “grey70”), labels = c(“TRUE” = “Live now”, “FALSE” = “Offline”)) + labs(title = “Final Recommendation Scores for User U1”, x = NULL, y = “Hybrid score”, fill = NULL) + theme_minimal()
=============================================================
5. COMPARISON WITH AMAZON / NYT
=============================================================
comp_scores <- tibble( Dimension = rep(c(“Item churn”, “Personalization weight”, “Cold-start severity”, “Real-time pressure”, “Diversity need”), 3), Platform = rep(c(“Twitch”, “Amazon”, “NYT”), each = 5), Score = c(5, 4, 5, 5, 4, # Twitch 2, 5, 3, 2, 3, # Amazon 5, 3, 5, 4, 5) # NYT )
ggplot(comp_scores, aes(Dimension, Score, fill = Platform)) + geom_col(position = “dodge”) + scale_fill_manual(values = c(“Twitch” = “#9146FF”, “Amazon” = “#FF9900”, “NYT” = “#326891”)) + labs(title = “Recommender Pressure Profile by Platform”, y = “Intensity (1-5)”, x = NULL) + theme_minimal() + theme(axis.text.x = element_text(angle = 20, hjust = 1))
=============================================================
6. RECOMMENDATIONS FOR IMPROVEMENT
=============================================================
improvements <- tibble( N = 1:4, Recommendation = c(“Better explanation of recs”, “Diversity boost for emerging streamers”, “Smarter mid-stream re-categorization”, “Cross-device session continuity”), Expected_Impact = c(4, 9, 6, 3) )
ggplot(improvements, aes(reorder(Recommendation, N), Expected_Impact, fill = Recommendation)) + geom_col(show.legend = FALSE) + coord_flip() + labs(title = “Estimated Relative Impact of Recommendations”, x = NULL, y = “Relative impact”) + theme_minimal() + scale_fill_brewer(palette = “Set2”)
=============================================================
7. CONCLUSION
=============================================================
cat(“Twitch’s recommender must blend collaborative, content, and”, “live-popularity signals.”, “It differs most from Amazon and NYT on item lifetime and”, “real-time pressure.”, “Key improvement areas: explainability, diversity, mid-stream”, “re-tagging, cross-device.”)