library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr 1.1.4 ✔ readr 2.1.5
## ✔ forcats 1.0.1 ✔ stringr 1.5.1
## ✔ ggplot2 4.0.2 ✔ tibble 3.3.0
## ✔ lubridate 1.9.4 ✔ tidyr 1.3.1
## ✔ purrr 1.0.4
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(plotly)
##
## Attaching package: 'plotly'
##
## The following object is masked from 'package:ggplot2':
##
## last_plot
##
## The following object is masked from 'package:stats':
##
## filter
##
## The following object is masked from 'package:graphics':
##
## layout
library(scales)
##
## Attaching package: 'scales'
##
## The following object is masked from 'package:purrr':
##
## discard
##
## The following object is masked from 'package:readr':
##
## col_factor
movies <- read.csv("final_movies_tableau.csv")
Introduction
Studios attempt to control movie success through budget, release timing, and genre selection. However, audiences ultimately determine success through engagement and global demand. This analysis explores which factors truly drive blockbuster performance.
ggplot(movies, aes(x = worldwide_gross)) +
geom_histogram(bins = 40, fill = "steelblue", alpha = 0.8) +
labs(
title = "Most Movies Don’t Become Blockbusters",
x = "Worldwide Gross (USD)",
y = "Number of Movies"
) +
scale_x_continuous(labels = dollar) +
theme_minimal()
p <- ggplot(movies, aes(
x = production_budget,
y = worldwide_gross,
text = paste(
"Movie:", movie,
"<br>Year:", year,
"<br>Genre:", primary_genre,
"<br>Budget:", dollar(production_budget),
"<br>Revenue:", dollar(worldwide_gross)
)
)) +
geom_point(alpha = 0.6) +
geom_smooth(method = "lm", color = "red") +
labs(
title = "Do Bigger Budgets Actually Lead to Higher Revenue?",
x = "Production Budget (USD)",
y = "Worldwide Gross (USD)"
) +
scale_x_continuous(labels = dollar) +
scale_y_continuous(labels = dollar) +
theme_minimal()
ggplotly(p, tooltip = "text")
## `geom_smooth()` using formula = 'y ~ x'
ggplot(movies, aes(x = production_budget, y = profit_margin)) +
geom_point(alpha = 0.5) +
labs(
title = "Do Expensive Movies Use Money Efficiently?",
x = "Production Budget (USD)",
y = "Profit Margin (%)"
) +
scale_x_continuous(labels = dollar) +
scale_y_continuous(labels = percent) +
theme_minimal()
ggplot(movies, aes(x = reorder(primary_genre, worldwide_gross, median),
y = worldwide_gross)) +
geom_boxplot(fill = "lightblue") +
labs(
title = "Which Genres Generate the Most Revenue?",
x = "Genre",
y = "Worldwide Gross (USD)"
) +
scale_y_continuous(labels = dollar) +
coord_flip() +
theme_minimal()
ggplot(movies, aes(x = reorder(primary_genre, profit_margin, median),
y = profit_margin)) +
geom_boxplot(fill = "lightgreen") +
labs(
title = "Which Genres Are the Most Profitable?",
x = "Genre",
y = "Profit Margin (%)"
) +
scale_y_continuous(labels = percent) +
coord_flip() +
theme_minimal()
## Warning: Removed 101 rows containing non-finite outside the scale range
## (`stat_boxplot()`).
movies %>%
group_by(month) %>%
summarise(avg_gross = mean(worldwide_gross, na.rm = TRUE)) %>%
ggplot(aes(x = factor(month), y = avg_gross)) +
geom_col(fill = "orange") +
labs(
title = "When Is the Best Time to Release a Movie?",
x = "Month (1 = Jan, 12 = Dec)",
y = "Average Worldwide Gross (USD)"
) +
scale_y_continuous(labels = dollar) +
theme_minimal()
movies %>%
group_by(year) %>%
summarise(avg_profit = mean(profit, na.rm = TRUE)) %>%
ggplot(aes(x = year, y = avg_profit)) +
geom_line(color = "blue") +
labs(
title = "Are Movies Becoming More Profitable Over Time?",
x = "Year",
y = "Average Profit (USD)"
) +
scale_y_continuous(labels = dollar) +
theme_minimal()
ggplot(movies, aes(x = vote_count, y = worldwide_gross)) +
geom_point(alpha = 0.5) +
labs(
title = "Do Popular Movies Earn More Money?",
x = "Vote Count (Number of Votes)",
y = "Worldwide Gross (USD)"
) +
scale_y_continuous(labels = dollar) +
theme_minimal()
ggplot(movies, aes(x = pct_foreign, y = worldwide_gross)) +
geom_point(alpha = 0.5) +
labs(
title = "Are International Audiences Driving Movie Success?",
x = "Foreign Revenue Share (%)",
y = "Worldwide Gross (USD)"
) +
scale_x_continuous(labels = percent) +
scale_y_continuous(labels = dollar) +
theme_minimal()
ggplot(movies, aes(
x = production_budget,
y = worldwide_gross,
color = primary_genre,
size = vote_count
)) +
geom_point(alpha = 0.6) +
labs(
title = "What Combination of Factors Creates a Blockbuster?",
x = "Production Budget (USD)",
y = "Worldwide Gross (USD)",
color = "Genre",
size = "Vote Count"
) +
scale_x_continuous(labels = dollar) +
scale_y_continuous(labels = dollar) +
theme_minimal()
Conclusion
While studios attempt to control success through strategic decisions like budget, genre, and release timing, these factors alone do not guarantee strong performance. Audience behavior, particularly engagement and international demand, plays a more consistent and powerful role in determining success. Ultimately, blockbuster performance emerges when both studio strategy and audience response align.