Objective: Analyze whether certain genre pairings achieve higher ratings than individual genres alone.
Analysis Approach
- Identify best and worst genre combinations in the dataset
- Calculate average ratings for genre pairs
- Compare combination ratings against single-genre baselines
Top 10 Genre Combinations
# Remove rows with N/A values
genre_combos <- movies %>%
filter(!is.na(rating)) %>%
mutate(
genre_count = Action + Animation + Comedy + Drama + Documentary + Romance + Short,
# All possible 2-genre combinations
action_animation = ifelse(Action == 1 & Animation == 1, 1, 0),
action_comedy = ifelse(Action == 1 & Comedy == 1, 1, 0),
action_drama = ifelse(Action == 1 & Drama == 1, 1, 0),
action_documentary = ifelse(Action == 1 & Documentary == 1, 1, 0),
action_romance = ifelse(Action == 1 & Romance == 1, 1, 0),
action_short = ifelse(Action == 1 & Short == 1, 1, 0),
animation_comedy = ifelse(Animation == 1 & Comedy == 1, 1, 0),
animation_drama = ifelse(Animation == 1 & Drama == 1, 1, 0),
animation_documentary = ifelse(Animation == 1 & Documentary == 1, 1, 0),
animation_romance = ifelse(Animation == 1 & Romance == 1, 1, 0),
animation_short = ifelse(Animation == 1 & Short == 1, 1, 0),
comedy_drama = ifelse(Comedy == 1 & Drama == 1, 1, 0),
comedy_documentary = ifelse(Comedy == 1 & Documentary == 1, 1, 0),
comedy_romance = ifelse(Comedy == 1 & Romance == 1, 1, 0),
comedy_short = ifelse(Comedy == 1 & Short == 1, 1, 0),
drama_documentary = ifelse(Drama == 1 & Documentary == 1, 1, 0),
drama_romance = ifelse(Drama == 1 & Romance == 1, 1, 0),
drama_short = ifelse(Drama == 1 & Short == 1, 1, 0),
documentary_romance = ifelse(Documentary == 1 & Romance == 1, 1, 0),
documentary_short = ifelse(Documentary == 1 & Short == 1, 1, 0),
romance_short = ifelse(Romance == 1 & Short == 1, 1, 0)
)
# Average ratings for all combinations
combo_ratings <- data.frame(
Combination = c(
"Action_Animation", "Action_Comedy", "Action_Drama", "Action_Documentary", "Action_Romance",
"Action_Short","Animation_Comedy", "Animation_Drama", "Animation_Documentary", "Animation_Romance",
"Animation_Short","Comedy_Drama", "Comedy_Documentary", "Comedy_Romance", "Comedy_Short",
"Drama_Documentary", "Drama_Romance", "Drama_Short", "Documentary_Romance", "Documentary_Short",
"Romance_Short", "Single_Genre", "Multiple_Genres"
),
Average_Rating = c(
mean(genre_combos$rating[genre_combos$action_animation == 1], na.rm = TRUE),
mean(genre_combos$rating[genre_combos$action_comedy == 1], na.rm = TRUE),
mean(genre_combos$rating[genre_combos$action_drama == 1], na.rm = TRUE),
mean(genre_combos$rating[genre_combos$action_documentary == 1], na.rm = TRUE),
mean(genre_combos$rating[genre_combos$action_romance == 1], na.rm = TRUE),
mean(genre_combos$rating[genre_combos$action_short == 1], na.rm = TRUE),
mean(genre_combos$rating[genre_combos$animation_comedy == 1], na.rm = TRUE),
mean(genre_combos$rating[genre_combos$animation_drama == 1], na.rm = TRUE),
mean(genre_combos$rating[genre_combos$animation_documentary == 1], na.rm = TRUE),
mean(genre_combos$rating[genre_combos$animation_romance == 1], na.rm = TRUE),
mean(genre_combos$rating[genre_combos$animation_short == 1], na.rm = TRUE),
mean(genre_combos$rating[genre_combos$comedy_drama == 1], na.rm = TRUE),
mean(genre_combos$rating[genre_combos$comedy_documentary == 1], na.rm = TRUE),
mean(genre_combos$rating[genre_combos$comedy_romance == 1], na.rm = TRUE),
mean(genre_combos$rating[genre_combos$comedy_short == 1], na.rm = TRUE),
mean(genre_combos$rating[genre_combos$drama_documentary == 1], na.rm = TRUE),
mean(genre_combos$rating[genre_combos$drama_romance == 1], na.rm = TRUE),
mean(genre_combos$rating[genre_combos$drama_short == 1], na.rm = TRUE),
mean(genre_combos$rating[genre_combos$documentary_romance == 1], na.rm = TRUE),
mean(genre_combos$rating[genre_combos$documentary_short == 1], na.rm = TRUE),
mean(genre_combos$rating[genre_combos$romance_short == 1], na.rm = TRUE),
mean(genre_combos$rating[genre_combos$genre_count == 1], na.rm = TRUE),
mean(genre_combos$rating[genre_combos$genre_count > 1], na.rm = TRUE)
)
)
# Top 10 combinations
combo_ratings_sorted <- combo_ratings[order(-combo_ratings$Average_Rating), ]
top_10_combinations <- head(combo_ratings_sorted, 10)
bottom_10_combinations <- tail(combo_ratings_sorted, 10)
cat("TOP 10 GENRE COMBINATIONS BY AVERAGE RATING:\n",
capture.output(print(top_10_combinations, row.names = FALSE)),
"\nBOTTOM 10 GENRE COMBINATIONS BY AVERAGE RATING:\n",
capture.output(print(bottom_10_combinations, row.names = FALSE)),
sep = "\n")
TOP 10 GENRE COMBINATIONS BY AVERAGE RATING:
Combination Average_Rating
Animation_Romance 7.092500
Animation_Drama 6.928148
Animation_Documentary 6.897674
Action_Documentary 6.800000
Documentary_Romance 6.770000
Animation_Short 6.637548
Animation_Comedy 6.618081
Comedy_Short 6.612835
Drama_Short 6.569791
Drama_Documentary 6.559055
BOTTOM 10 GENRE COMBINATIONS BY AVERAGE RATING:
Combination Average_Rating
Documentary_Short 6.345444
Drama_Romance 6.292737
Action_Animation 6.285714
Multiple_Genres 6.255300
Comedy_Drama 6.222814
Comedy_Romance 6.096310
Action_Romance 6.005776
Single_Genre 5.971873
Action_Drama 5.640523
Action_Comedy 5.567526
# Count movies in top combinations
output_lines <- c("NUMBER OF MOVIES IN TOP COMBINATIONS:\n")
for(i in 1:nrow(top_10_combinations)) {
combo <- top_10_combinations$Combination[i]
if(combo == "Single_Genre") {
count <- sum(genre_combos$genre_count == 1)
} else if(combo == "Multiple_Genres") {
count <- sum(genre_combos$genre_count > 1)
} else {
col_name <- tolower(combo)
count <- sum(genre_combos[[col_name]] == 1, na.rm = TRUE)
}
output_lines <- c(output_lines, paste0(combo, ": ", count, " movies"))
}
# Count movies in bottom combinations
output_lines <- c(output_lines, "\nNUMBER OF MOVIES IN BOTTOM COMBINATIONS:\n")
for(i in 1:nrow(bottom_10_combinations)) {
combo <- bottom_10_combinations$Combination[i]
if(combo == "Single_Genre") {
count <- sum(genre_combos$genre_count == 1)
} else if(combo == "Multiple_Genres") {
count <- sum(genre_combos$genre_count > 1)
} else {
col_name <- tolower(combo)
count <- sum(genre_combos[[col_name]] == 1, na.rm = TRUE)
}
output_lines <- c(output_lines, paste0(combo, ": ", count, " movies"))
}
# Print everything at once
cat(output_lines, sep = "\n")
NUMBER OF MOVIES IN TOP COMBINATIONS:
Animation_Romance: 40 movies
Animation_Drama: 135 movies
Animation_Documentary: 43 movies
Action_Documentary: 16 movies
Documentary_Romance: 10 movies
Animation_Short: 3116 movies
Animation_Comedy: 2251 movies
Comedy_Short: 3880 movies
Drama_Short: 1099 movies
Drama_Documentary: 127 movies
NUMBER OF MOVIES IN BOTTOM COMBINATIONS:
Documentary_Short: 867 movies
Drama_Romance: 2561 movies
Action_Animation: 84 movies
Multiple_Genres: 15537 movies
Comedy_Drama: 3099 movies
Comedy_Romance: 2195 movies
Action_Romance: 277 movies
Single_Genre: 30465 movies
Action_Drama: 1799 movies
Action_Comedy: 776 movies
Bar Plot (Top Combinations)
# Create colors
pastels <- c("#FFB3BA", # Animation_Romance
"#FFDFBA", # Animation_Drama
"#FFFFBA", # Animation_Documentary
"#E6FFCC", # Action_Documentary
"#BAFFC9", # Documentary_Romance
"#C9E7FF", # Animation_Short
"#BAE1FF", # Animation_Comedy
"#E2BAFF", # Comedy_Short
"#EEE5FF", # Drama_Short
"#D3D3D3") # Drama_Documentary
# Set larger margins
par(mar = c(6, 4, 4, 2))
# Bar plot
bp <- barplot(top_10_combinations$Average_Rating,
col = pastels,
main = "Top 10 Genre Combinations by Average Rating",
ylab = "Average Rating (1-10)",
ylim = c(0, max(top_10_combinations$Average_Rating) * 1.15),
xaxt = "n")
# X-axis labels rotated 45-degrees
text(x = bp,
y = par("usr")[3] - (max(top_10_combinations$Average_Rating) * 0.05),
labels = top_10_combinations$Combination,
srt = 45,
adj = 1,
xpd = TRUE,
cex = 0.7)
# Add x-axis title
mtext("Genre Combination", side = 1, line = 5, cex = 1)
# Add numbers on top of bars
text(x = bp,
y = top_10_combinations$Average_Rating + (max(top_10_combinations$Average_Rating) * 0.02),
labels = round(top_10_combinations$Average_Rating, 2),
pos = 3,
cex = 0.8,
xpd = TRUE)

Graph Analysis: Animation-based genre combinations dominate the highest-rated parings, with Animation-Romance leading at 7.09/10 average rating. Documentary and Drama combinations also performed well, particularly when paired with Animation or Romance. This suggests that blending artistic/visual genres (Animation) with emotionally-driven genres (Romance, Drama) creates a powerful combination that resonates strongly with audiences.
Bar Plot (Bottom Combinations)
# Create colors
pastels <- c("#FFB3BA", # Documentary_Short
"#FFDFBA", # Drama_Romance
"#FFFFBA", # Action_Animation
"#E6FFCC", # Multiple_Genres
"#BAFFC9", # Comedy_Drama
"#C9E7FF", # Comedy_Romance
"#BAE1FF", # Action_Romance
"#E2BAFF", # Single_Genre
"#EEE5FF", # Action_Drama
"#D3D3D3") # Action_Comedy
# Set larger margins
par(mar = c(6.5, 4, 4, 2))
# Bar plot
bp <- barplot(bottom_10_combinations$Average_Rating,
col = pastels,
main = "Bottom 10 Genre Combinations by Average Rating",
ylab = "Average Rating (1-10)",
ylim = c(0, max(bottom_10_combinations$Average_Rating) * 1.15),
xaxt = "n")
# X-axis labels rotated 45-degrees
text(x = bp,
y = par("usr")[3] - (max(bottom_10_combinations$Average_Rating) * 0.05),
labels = bottom_10_combinations$Combination,
srt = 45,
adj = 1,
xpd = TRUE,
cex = 0.7)
# Add x-axis title
mtext("Genre Combination", side = 1, line = 5, cex = 1)
# Add numbers on top of bars
text(x = bp,
y = bottom_10_combinations$Average_Rating + (max(bottom_10_combinations$Average_Rating) * 0.02),
labels = round(bottom_10_combinations$Average_Rating, 2),
pos = 3,
cex = 0.8,
xpd = TRUE)

Graph Analysis: Action-based combinations consistently under-perform, with Action-Drama and Action-Comedy among the lowest-rated pairings. Surprisingly, Single-Genre films and Multiple-Genres overall rank in the bottom tier, indicating that pure genre films struggle to achieve high ratings compared to specific and well-matched genre pairs.