##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(ggplot2)
library(knitr)
# Connect to MySQL
con <- dbConnect(
RMariaDB::MariaDB(),
dbname = "imdm_db",
host = "localhost",
user = "root",
password = "Aaliyah131",
port = 3306
)-- Top 10 Directors by Average IMDB Rating
SELECT Director,
ROUND(AVG(IMDB_Rating), 2) AS avg_rating,
COUNT(*) AS num_movies
FROM imdb_top_1000
GROUP BY Director
HAVING COUNT(*) > 2
ORDER BY avg_rating DESC
LIMIT 10;| Director | avg_rating | num_movies |
|---|---|---|
| Christopher Nolan | 8.46 | 8 |
| Peter Jackson | 8.40 | 5 |
| Francis Ford Coppola | 8.40 | 5 |
| Charles Chaplin | 8.33 | 6 |
| Sergio Leone | 8.27 | 6 |
| Stanley Kubrick | 8.23 | 9 |
| Akira Kurosawa | 8.22 | 10 |
| Frank Capra | 8.20 | 4 |
| Quentin Tarantino | 8.18 | 8 |
| Rajkumar Hirani | 8.18 | 4 |
Insight: This query identifies the top 10 directors with the highest average IMDB rating, considering only those who have directed more than 2 movies.
-- Top 10 movies by IMDB rating including genre
SELECT Series_Title, Genre, IMDB_Rating, Released_Year
FROM imdb_top_1000
ORDER BY IMDB_Rating DESC
LIMIT 10;| Series_Title | Genre | IMDB_Rating | Released_Year |
|---|---|---|---|
| The Shawshank Redemption | Drama | 9.3 | 1994 |
| The Godfather | Crime Drama | 9.2 | 1972 |
| The Dark Knight | Action Crime Drama | 9.0 | 2008 |
| The Godfather: Part II | Crime Drama | 9.0 | 1974 |
| 12 Angry Men | Crime Drama | 9.0 | 1957 |
| The Lord of the Rings: The Return of the King | Action Adventure Drama | 8.9 | 2003 |
| Pulp Fiction | Crime Drama | 8.9 | 1994 |
| Schindler’s List | Biography Drama History | 8.9 | 1993 |
| Inception | Action Adventure Sci-Fi | 8.8 | 2010 |
| Fight Club | Drama | 8.8 | 1999 |
Insight: * The top 10 movies by IMDB rating reveal that Crime, Drama, and Action genres consistently dominate audience and critic preferences. * High-rated films in these categories—like The Godfather, The Dark Knight, and Pulp Fiction—demonstrate enduring popularity and strong engagement potential. * For streaming platforms or content acquisition teams, this indicates that investing in or promoting movies within these genres is likely to attract viewers and maximize retention. ## Key Findings Summary
-- Year with most Top 1000 Movies
SELECT Released_Year, COUNT(*) AS movie_count
FROM imdb_top_1000
GROUP BY Released_Year
ORDER BY movie_count DESC
LIMIT 1;| Released_Year | movie_count |
|---|---|
| 2014 | 32 |
Insight: This query identifies the year with the most movies in the IMDb Top 1000, revealing trends in high-quality film production over time.
-- Movies with High Meta_score and IMDB Rating
SELECT Series_Title, IMDB_Rating, Meta_score
FROM imdb_top_1000
WHERE Meta_score > 80 AND IMDB_Rating > 8
ORDER BY IMDB_Rating DESC;| Series_Title | IMDB_Rating | Meta_score |
|---|---|---|
| The Godfather | 9.2 | 100 |
| The Dark Knight | 9.0 | 84 |
| The Godfather: Part II | 9.0 | 90 |
| 12 Angry Men | 9.0 | 96 |
| The Lord of the Rings: The Return of the King | 8.9 | 94 |
| Pulp Fiction | 8.9 | 94 |
| Schindler’s List | 8.9 | 94 |
| The Lord of the Rings: The Fellowship of the Ring | 8.8 | 92 |
| Forrest Gump | 8.8 | 82 |
| Il buono il brutto il cattivo | 8.8 | 90 |
Insight: This query identifies movies that have achieved both strong critical acclaim (Meta_score > 80) and high audience approval (IMDB Rating > 8) — the ideal combination for long-term brand and revenue success.
-- Average Runtime by Certificate
SELECT Certificate, ROUND(AVG(CAST(SUBSTRING_INDEX(Runtime,' ',1) AS UNSIGNED)),1) AS avg_runtime
FROM imdb_top_1000
GROUP BY Certificate;| Certificate | avg_runtime |
|---|---|
| A | 128.9 |
| UA | 129.3 |
| U | 124.2 |
| PG-13 | 117.0 |
| R | 122.4 |
| 111.2 | |
| PG | 114.3 |
| G | 119.7 |
| Passed | 106.2 |
| TV-14 | 132.0 |
Insight: This query analyzes how movie runtimes vary across different audience certification categories (such as PG, PG-13, R, etc.), providing valuable insight into content strategy and audience targeting.
-- Actors appearing most frequently
SELECT Star1 AS Actor, COUNT(*) AS appearances
FROM imdb_top_1000
GROUP BY Star1
ORDER BY appearances DESC
LIMIT 5;| Actor | appearances |
|---|---|
| Tom Hanks | 12 |
| Robert De Niro | 11 |
| Al Pacino | 10 |
| Clint Eastwood | 10 |
| Leonardo DiCaprio | 9 |
Insight: This query identifies the top 5 actors who appear most frequently in the IMDb Top 1000 movies, highlighting those with consistent box-office or critical success.
-- Number of Movies Per Genre
SELECT Genre, COUNT(*) AS total_movies
FROM imdb_top_1000
GROUP BY Genre
ORDER BY total_movies DESC;| Genre | total_movies |
|---|---|
| Drama | 85 |
| Drama Romance | 37 |
| Comedy Drama | 35 |
| Comedy Drama Romance | 31 |
| Action Crime Drama | 30 |
| Biography Drama History | 28 |
| Crime Drama Thriller | 28 |
| Crime Drama Mystery | 27 |
| Crime Drama | 26 |
| Animation Adventure Comedy | 24 |
Insight: This query highlights the distribution of movies across different genres within the IMDb Top 1000 dataset, revealing audience and industry preferences over time.
-- Relationship Between Gross and IMDB Rating
SELECT
ROUND(IMDB_Rating,1) AS rating,
ROUND(AVG(Gross),0) AS avg_gross,
COUNT(*) AS movies_count
FROM imdb_top_1000
WHERE Gross IS NOT NULL
GROUP BY rating
ORDER BY rating DESC;| rating | avg_gross | movies_count |
|---|---|---|
| 9.3 | 28341469 | 1 |
| 9.2 | 134966411 | 1 |
| 9.0 | 198839481 | 3 |
| 8.9 | 194224495 | 3 |
| 8.8 | 196300646 | 5 |
| 8.7 | 192668551 | 5 |
| 8.6 | 81588361 | 15 |
| 8.5 | 85289813 | 20 |
| 8.4 | 107871716 | 31 |
| 8.3 | 42180047 | 44 |
Insight: This query explores how movie quality (IMDB Rating) correlates with box office performance (Gross revenue).
-- Total Gross Revenue per Year
SELECT Released_Year, SUM(Gross) AS total_gross
FROM imdb_top_1000
GROUP BY Released_Year
ORDER BY Released_Year
DESC;| Released_Year | total_gross |
|---|---|
| 2020 | 0 |
| 2019 | 2406742688 |
| 2018 | 2607757362 |
| 2017 | 2061312852 |
| 2016 | 2595557425 |
| 2015 | 2462336868 |
| 2014 | 2755629221 |
| 2013 | 1285357197 |
| 2012 | 2542616037 |
| 2011 | 1092346139 |
Insight: This query shows both the volume of movies released and their total gross revenue per year.