library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(tidyr)
library(readr)
Netflix <- read_csv("Netflix.csv", show_col_types = FALSE)
Netflix <- Netflix %>%
separate_rows(cast, sep = ", ") %>%
filter(!is.na(cast))
Netflix <- Netflix %>%
rename(actor = cast)
actor_appearances <- Netflix %>%
group_by(actor) %>%
summarize(appearances = n()) %>%
arrange(desc(appearances)) %>%
top_n(6)
## Selecting by appearances
print(actor_appearances)
## # A tibble: 6 × 2
## actor appearances
## <chr> <int>
## 1 Anupam Kher 33
## 2 Shah Rukh Khan 30
## 3 Naseeruddin Shah 27
## 4 Om Puri 27
## 5 Akshay Kumar 26
## 6 Yuki Kaji 26