library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(tidyr)
##Load the dataset
netflix_data <- read.csv("Netflix.csv")
tv_shows <- netflix_data %>%
filter(type == "TV Show")
##Separate multiple actors in the “cast” column into individual rows
tv_shows <- tv_shows %>%
separate_rows(cast, sep = ", ") %>%
rename(actor = cast)
##Top 6 actors with the most appearances
top_actors <- tv_shows %>%
group_by(actor) %>%
count(actor, sort = TRUE) %>%
top_n(6, wt = n)
##Result
print(top_actors)
## # A tibble: 11,444 × 2
## # Groups: actor [11,444]
## actor n
## <chr> <int>
## 1 "" 210
## 2 "Takahiro Sakurai" 18
## 3 "Yuki Kaji" 16
## 4 "Daisuke Ono" 14
## 5 "David Attenborough" 14
## 6 "Ashleigh Ball" 12
## 7 "Hiroshi Kamiya" 12
## 8 "Jun Fukuyama" 12
## 9 "Tomokazu Sugita" 12
## 10 "Ai Kayano" 11
## # ℹ 11,434 more rows