# Set working directory where Netflix.csv is saved
setwd("/Users/zaari/downloads") # change folder if needed
# Load Netflix dataset
Netflix <- read_csv("Netflix.csv")
## Rows: 6234 Columns: 12
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (10): type, title, director, cast, country, date_added, rating, duration...
## dbl (2): show_id, release_year
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
# Inspect column names
colnames(Netflix)
## [1] "show_id" "type" "title" "director" "cast"
## [6] "country" "date_added" "release_year" "rating" "duration"
## [11] "listed_in" "description"
head(Netflix)
## # A tibble: 6 × 12
## show_id type title director cast country date_added release_year rating
## <dbl> <chr> <chr> <chr> <chr> <chr> <chr> <dbl> <chr>
## 1 81145628 Movie Norm o… Richard… Alan… United… September… 2019 TV-PG
## 2 80117401 Movie Jandin… <NA> Jand… United… September… 2016 TV-MA
## 3 70234439 TV Show Transf… <NA> Pete… United… September… 2013 TV-Y7…
## 4 80058654 TV Show Transf… <NA> Will… United… September… 2016 TV-Y7
## 5 80125979 Movie #reali… Fernand… Nest… United… September… 2017 TV-14
## 6 80163890 TV Show Apaches <NA> Albe… Spain September… 2016 TV-MA
## # ℹ 3 more variables: duration <chr>, listed_in <chr>, description <chr>
# Separate actors in the cast column, rename column to 'actor', and remove NAs
Netflix_Actor <- Netflix %>%
separate_rows(cast, sep = ", ") %>%
drop_na(cast) %>%
rename(actor = cast)
# Inspect transformed data
head(Netflix_Actor)
## # A tibble: 6 × 12
## show_id type title director actor country date_added release_year rating
## <dbl> <chr> <chr> <chr> <chr> <chr> <chr> <dbl> <chr>
## 1 81145628 Movie Norm of … Richard… Alan… United… September… 2019 TV-PG
## 2 81145628 Movie Norm of … Richard… Andr… United… September… 2019 TV-PG
## 3 81145628 Movie Norm of … Richard… Bria… United… September… 2019 TV-PG
## 4 81145628 Movie Norm of … Richard… Cole… United… September… 2019 TV-PG
## 5 81145628 Movie Norm of … Richard… Jenn… United… September… 2019 TV-PG
## 6 81145628 Movie Norm of … Richard… Jona… United… September… 2019 TV-PG
## # ℹ 3 more variables: duration <chr>, listed_in <chr>, description <chr>
# Find the top 6 actors with the most TV show appearances
Top_Actors <- Netflix_Actor %>%
select(type, actor) %>%
filter(type == "TV Show") %>%
group_by(actor) %>%
count(sort = TRUE) %>%
ungroup() %>%
head(6)
Top_Actors
## # A tibble: 6 × 2
## actor n
## <chr> <int>
## 1 Takahiro Sakurai 18
## 2 Yuki Kaji 16
## 3 Daisuke Ono 14
## 4 David Attenborough 14
## 5 Ashleigh Ball 12
## 6 Hiroshi Kamiya 12