library(tidyverse)
library(knitr)
# Read the Netflix dataset
Netflix <- read.csv("Netflix.csv", stringsAsFactors = FALSE)
# Display the structure of the dataset
str(Netflix)
## 'data.frame': 6234 obs. of 12 variables:
## $ show_id : int 81145628 80117401 70234439 80058654 80125979 80163890 70304989 80164077 80117902 70304990 ...
## $ type : chr "Movie" "Movie" "TV Show" "TV Show" ...
## $ title : chr "Norm of the North: King Sized Adventure" "Jandino: Whatever it Takes" "Transformers Prime" "Transformers: Robots in Disguise" ...
## $ director : chr "Richard Finn, Tim Maltby" "" "" "" ...
## $ cast : chr "Alan Marriott, Andrew Toth, Brian Dobson, Cole Howard, Jennifer Cameron, Jonathan Holmes, Lee Tockar, Lisa Duru"| __truncated__ "Jandino Asporaat" "Peter Cullen, Sumalee Montano, Frank Welker, Jeffrey Combs, Kevin Michael Richardson, Tania Gunadi, Josh Keaton"| __truncated__ "Will Friedle, Darren Criss, Constance Zimmer, Khary Payton, Mitchell Whitfield, Stuart Allan, Ted McGinley, Peter Cullen" ...
## $ country : chr "United States, India, South Korea, China" "United Kingdom" "United States" "United States" ...
## $ date_added : chr "September 9, 2019" "September 9, 2016" "September 8, 2018" "September 8, 2018" ...
## $ release_year: int 2019 2016 2013 2016 2017 2016 2014 2017 2017 2014 ...
## $ rating : chr "TV-PG" "TV-MA" "TV-Y7-FV" "TV-Y7" ...
## $ duration : chr "90 min" "94 min" "1 Season" "1 Season" ...
## $ listed_in : chr "Children & Family Movies, Comedies" "Stand-Up Comedy" "Kids' TV" "Kids' TV" ...
## $ description : chr "Before planning an awesome wedding for his grandfather, a polar bear king must take back a stolen artifact from"| __truncated__ "Jandino Asporaat riffs on the challenges of raising kids and serenades the audience with a rousing rendition of"| __truncated__ "With the help of three human allies, the Autobots once again protect Earth from the onslaught of the Decepticon"| __truncated__ "When a prison ship crash unleashes hundreds of Decepticons on Earth, Bumblebee leads a new Autobot force to protect humankind." ...
The cast column contains multiple actors separated by commas. We need to transform the data so that each actor has their own row.
# Separate actors in the cast column and rename to actor
Netflix_Actor <- Netflix %>%
separate_rows(cast, sep = ", ") %>%
drop_na(cast) %>%
rename(actor = cast)
# Display the transformed data structure
cat("Transformed dataset dimensions:", dim(Netflix_Actor)[1], "rows and",
dim(Netflix_Actor)[2], "columns\n")
## Transformed dataset dimensions: 44881 rows and 12 columns
# Show sample of transformed data
head(Netflix_Actor %>% select(type, actor, title), 10) %>%
kable(caption = "Sample of transformed data with individual actors")
| type | actor | title |
|---|---|---|
| Movie | Alan Marriott | Norm of the North: King Sized Adventure |
| Movie | Andrew Toth | Norm of the North: King Sized Adventure |
| Movie | Brian Dobson | Norm of the North: King Sized Adventure |
| Movie | Cole Howard | Norm of the North: King Sized Adventure |
| Movie | Jennifer Cameron | Norm of the North: King Sized Adventure |
| Movie | Jonathan Holmes | Norm of the North: King Sized Adventure |
| Movie | Lee Tockar | Norm of the North: King Sized Adventure |
| Movie | Lisa Durupt | Norm of the North: King Sized Adventure |
| Movie | Maya Kay | Norm of the North: King Sized Adventure |
| Movie | Michael Dobson | Norm of the North: King Sized Adventure |
Now we’ll identify the six actors who appear most frequently in TV shows.
# Find the 6 actors with most appearances in TV Shows
Top_6_Actors <- Netflix_Actor %>%
select(type, actor) %>%
filter(type == "TV Show") %>%
group_by(actor) %>%
count(sort = TRUE) %>%
ungroup() %>%
head(6)
# Display the results
Top_6_Actors %>%
kable(caption = "Top 6 Actors with Most Appearances in Netflix TV Shows",
col.names = c("Actor", "Number of TV Shows"))
| Actor | Number of TV Shows |
|---|---|
| 210 | |
| Takahiro Sakurai | 18 |
| Yuki Kaji | 16 |
| Daisuke Ono | 14 |
| David Attenborough | 14 |
| Ashleigh Ball | 12 |
Let’s create a bar chart to visualize these top 6 actors.
# Create a bar plot
ggplot(Top_6_Actors, aes(x = reorder(actor, n), y = n, fill = actor)) +
geom_col(show.legend = FALSE) +
geom_text(aes(label = n), hjust = -0.3, size = 4) +
coord_flip() +
labs(title = "Top 6 Actors with Most Appearances in Netflix TV Shows",
subtitle = "Based on Netflix dataset analysis",
x = "Actor",
y = "Number of TV Show Appearances") +
theme_minimal() +
theme(plot.title = element_text(size = 16, face = "bold"),
plot.subtitle = element_text(size = 12),
axis.text = element_text(size = 11),
axis.title = element_text(size = 12, face = "bold")) +
scale_fill_brewer(palette = "Set3") +
ylim(0, max(Top_6_Actors$n) * 1.1)
Analysis Date: 2025-10-06