library(dplyr)
library(tidyr)
library(ggplot2)
# Sample data
netflix <- read.csv("Netflix.csv")
# Analyze the data and save the result to a variable
actor_counts <- netflix %>%
filter(type == "TV Show") %>%
select(title, cast) %>%
separate_rows(cast, sep = ",") %>%
rename(actor = cast) %>%
count(actor, sort = TRUE)
# Print the results
actor_counts
## # A tibble: 11,871 × 2
## actor n
## <chr> <int>
## 1 "" 210
## 2 " Takahiro Sakurai" 18
## 3 " Yuki Kaji" 14
## 4 "David Attenborough" 14
## 5 " Tomokazu Sugita" 12
## 6 " Ai Kayano" 11
## 7 " Daisuke Ono" 11
## 8 " Junichi Suwabe" 10
## 9 " Ashleigh Ball" 9
## 10 " Hiroshi Kamiya" 9
## # ℹ 11,861 more rows
# Plot the top 10 actors
top_actors <- actor_counts %>% head(10)
ggplot(top_actors, aes(x = reorder(actor, n), y = n)) +
geom_bar(stat = "identity", fill = "steelblue") +
coord_flip() +
labs(title = "Top 10 Actors in Netflix TV Shows",
x = "Actor",
y = "Number of Appearances") +
theme_minimal()
