Dataset

This analysis uses Netflix.csv.

Make sure the following files are in the same folder before knitting: - Netflix.csv - Top_6_Actors_Netflix_TV_Shows.Rmd

netflix <- read_csv("Netflix.csv", show_col_types = FALSE)

glimpse(netflix)
## Rows: 6,234
## Columns: 12
## $ show_id      <dbl> 81145628, 80117401, 70234439, 80058654, 80125979, 8016389…
## $ type         <chr> "Movie", "Movie", "TV Show", "TV Show", "Movie", "TV Show…
## $ title        <chr> "Norm of the North: King Sized Adventure", "Jandino: What…
## $ director     <chr> "Richard Finn, Tim Maltby", NA, NA, NA, "Fernando Lebrija…
## $ cast         <chr> "Alan Marriott, Andrew Toth, Brian Dobson, Cole Howard, J…
## $ country      <chr> "United States, India, South Korea, China", "United Kingd…
## $ date_added   <chr> "September 9, 2019", "September 9, 2016", "September 8, 2…
## $ release_year <dbl> 2019, 2016, 2013, 2016, 2017, 2016, 2014, 2017, 2017, 201…
## $ rating       <chr> "TV-PG", "TV-MA", "TV-Y7-FV", "TV-Y7", "TV-14", "TV-MA", …
## $ duration     <chr> "90 min", "94 min", "1 Season", "1 Season", "99 min", "1 …
## $ listed_in    <chr> "Children & Family Movies, Comedies", "Stand-Up Comedy", …
## $ description  <chr> "Before planning an awesome wedding for his grandfather, …

Step 1: Filter only TV Shows

tv_shows <- netflix %>%
  filter(type == "TV Show")

Step 2: Separate actors so each actor has one row

tv_actors <- tv_shows %>%
  select(title, cast) %>%
  filter(!is.na(cast)) %>%
  separate_rows(cast, sep = ",") %>%
  mutate(actor = str_trim(cast)) %>%
  select(-cast)

Step 3: Count actor appearances in TV shows

actor_counts <- tv_actors %>%
  count(actor, name = "appearances") %>%
  arrange(desc(appearances))

actor_counts

Step 4: Top 6 actors with the most TV show appearances

top_6_actors <- actor_counts %>%
  slice_head(n = 6)

top_6_actors

(Optional) Visualization

ggplot(top_6_actors, aes(x = reorder(actor, appearances), y = appearances)) +
  geom_col(fill = "steelblue") +
  coord_flip() +
  labs(
    title = "Top 6 Actors With Most Netflix TV Show Appearances",
    x = "Actor",
    y = "Number of TV Shows"
  ) +
  theme_minimal()