library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(readr)
library(tidyr)
data <- read_csv("Netflix.csv")
## Rows: 6234 Columns: 12
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (10): type, title, director, cast, country, date_added, rating, duration...
## dbl (2): show_id, release_year
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
data <- data %>%
separate_rows(cast, sep = ", ") %>%
filter(!is.na(cast))
result <- data %>%
group_by(cast) %>%
summarize(appearances = n()) %>%
arrange(desc(appearances)) %>%
top_n(6)
## Selecting by appearances
result <- result %>%
rename(actor = cast)
print(result)
## # A tibble: 6 × 2
## actor appearances
## <chr> <int>
## 1 Anupam Kher 33
## 2 Shah Rukh Khan 30
## 3 Naseeruddin Shah 27
## 4 Om Puri 27
## 5 Akshay Kumar 26
## 6 Yuki Kaji 26