This analysis uses Netflix.csv.
Make sure the following files are in the same folder
before knitting: - Netflix.csv -
Top_6_Actors_Netflix_TV_Shows.Rmd
netflix <- read_csv("Netflix.csv", show_col_types = FALSE)
glimpse(netflix)
## Rows: 6,234
## Columns: 12
## $ show_id <dbl> 81145628, 80117401, 70234439, 80058654, 80125979, 8016389…
## $ type <chr> "Movie", "Movie", "TV Show", "TV Show", "Movie", "TV Show…
## $ title <chr> "Norm of the North: King Sized Adventure", "Jandino: What…
## $ director <chr> "Richard Finn, Tim Maltby", NA, NA, NA, "Fernando Lebrija…
## $ cast <chr> "Alan Marriott, Andrew Toth, Brian Dobson, Cole Howard, J…
## $ country <chr> "United States, India, South Korea, China", "United Kingd…
## $ date_added <chr> "September 9, 2019", "September 9, 2016", "September 8, 2…
## $ release_year <dbl> 2019, 2016, 2013, 2016, 2017, 2016, 2014, 2017, 2017, 201…
## $ rating <chr> "TV-PG", "TV-MA", "TV-Y7-FV", "TV-Y7", "TV-14", "TV-MA", …
## $ duration <chr> "90 min", "94 min", "1 Season", "1 Season", "99 min", "1 …
## $ listed_in <chr> "Children & Family Movies, Comedies", "Stand-Up Comedy", …
## $ description <chr> "Before planning an awesome wedding for his grandfather, …
tv_shows <- netflix %>%
filter(type == "TV Show")
cast column contains multiple actors separated by
commastv_actors <- tv_shows %>%
select(title, cast) %>%
filter(!is.na(cast)) %>%
separate_rows(cast, sep = ",") %>%
mutate(actor = str_trim(cast)) %>%
select(-cast)
actor_counts <- tv_actors %>%
count(actor, name = "appearances") %>%
arrange(desc(appearances))
actor_counts
top_6_actors <- actor_counts %>%
slice_head(n = 6)
top_6_actors
ggplot(top_6_actors, aes(x = reorder(actor, appearances), y = appearances)) +
geom_col(fill = "steelblue") +
coord_flip() +
labs(
title = "Top 6 Actors With Most Netflix TV Show Appearances",
x = "Actor",
y = "Number of TV Shows"
) +
theme_minimal()