This document analyzes the top actors in TV shows from the Netflix dataset.
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(tidyr)
Netflix <- read.csv("/cloud/project/Netflix.csv")
Netflix_Actor <- Netflix %>%
separate_rows(cast, sep = ", ") %>%
drop_na(cast) %>%
rename(actor = cast)
top_actors <- Netflix_Actor %>%
filter(type == "TV Show") %>%
group_by(actor) %>%
summarise(appearances = n()) %>%
arrange(desc(appearances)) %>%
head(6)
print(top_actors)
## # A tibble: 6 × 2
## actor appearances
## <chr> <int>
## 1 "" 210
## 2 "Takahiro Sakurai" 18
## 3 "Yuki Kaji" 16
## 4 "Daisuke Ono" 14
## 5 "David Attenborough" 14
## 6 "Ashleigh Ball" 12
print(top_actors)
## # A tibble: 6 × 2
## actor appearances
## <chr> <int>
## 1 "" 210
## 2 "Takahiro Sakurai" 18
## 3 "Yuki Kaji" 16
## 4 "Daisuke Ono" 14
## 5 "David Attenborough" 14
## 6 "Ashleigh Ball" 12