# Set working directory where Netflix.csv is saved
setwd("/Users/zaari/downloads")  # change folder if needed

# Load Netflix dataset
Netflix <- read_csv("Netflix.csv")
## Rows: 6234 Columns: 12
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (10): type, title, director, cast, country, date_added, rating, duration...
## dbl  (2): show_id, release_year
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
# Inspect column names
colnames(Netflix)
##  [1] "show_id"      "type"         "title"        "director"     "cast"        
##  [6] "country"      "date_added"   "release_year" "rating"       "duration"    
## [11] "listed_in"    "description"
head(Netflix)
## # A tibble: 6 × 12
##    show_id type    title   director cast  country date_added release_year rating
##      <dbl> <chr>   <chr>   <chr>    <chr> <chr>   <chr>             <dbl> <chr> 
## 1 81145628 Movie   Norm o… Richard… Alan… United… September…         2019 TV-PG 
## 2 80117401 Movie   Jandin… <NA>     Jand… United… September…         2016 TV-MA 
## 3 70234439 TV Show Transf… <NA>     Pete… United… September…         2013 TV-Y7…
## 4 80058654 TV Show Transf… <NA>     Will… United… September…         2016 TV-Y7 
## 5 80125979 Movie   #reali… Fernand… Nest… United… September…         2017 TV-14 
## 6 80163890 TV Show Apaches <NA>     Albe… Spain   September…         2016 TV-MA 
## # ℹ 3 more variables: duration <chr>, listed_in <chr>, description <chr>
# Separate actors in the cast column, rename column to 'actor', and remove NAs
Netflix_Actor <- Netflix %>% 
  separate_rows(cast, sep = ", ") %>% 
  drop_na(cast) %>% 
  rename(actor = cast)

# Inspect transformed data
head(Netflix_Actor)
## # A tibble: 6 × 12
##    show_id type  title     director actor country date_added release_year rating
##      <dbl> <chr> <chr>     <chr>    <chr> <chr>   <chr>             <dbl> <chr> 
## 1 81145628 Movie Norm of … Richard… Alan… United… September…         2019 TV-PG 
## 2 81145628 Movie Norm of … Richard… Andr… United… September…         2019 TV-PG 
## 3 81145628 Movie Norm of … Richard… Bria… United… September…         2019 TV-PG 
## 4 81145628 Movie Norm of … Richard… Cole… United… September…         2019 TV-PG 
## 5 81145628 Movie Norm of … Richard… Jenn… United… September…         2019 TV-PG 
## 6 81145628 Movie Norm of … Richard… Jona… United… September…         2019 TV-PG 
## # ℹ 3 more variables: duration <chr>, listed_in <chr>, description <chr>
# Find the top 6 actors with the most TV show appearances
Top_Actors <- Netflix_Actor %>%
  select(type, actor) %>% 
  filter(type == "TV Show") %>% 
  group_by(actor) %>% 
  count(sort = TRUE) %>% 
  ungroup() %>% 
  head(6)

Top_Actors
## # A tibble: 6 × 2
##   actor                  n
##   <chr>              <int>
## 1 Takahiro Sakurai      18
## 2 Yuki Kaji             16
## 3 Daisuke Ono           14
## 4 David Attenborough    14
## 5 Ashleigh Ball         12
## 6 Hiroshi Kamiya        12