Ensure Folder Exists and Load Netflix Dataset

# Define folder path
folder_path <- "/Users/faizhaikal/Desktop/Netflix"

# Create folder if it doesn't exist
dir.create(folder_path, showWarnings = FALSE)

# Prompt user to move Netflix.csv to this folder, then read it
Netflix_file <- file.path(folder_path, "Netflix.csv")

if (!file.exists(Netflix_file)) {
  stop(paste("Please move Netflix.csv to", folder_path))
}

# Read CSV with UTF-8 encoding
Netflix <- read_csv(Netflix_file, locale = locale(encoding = "UTF-8"))
## Rows: 6234 Columns: 12
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (10): type, title, director, cast, country, date_added, rating, duration...
## dbl  (2): show_id, release_year
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
glimpse(Netflix)
## Rows: 6,234
## Columns: 12
## $ show_id      <dbl> 81145628, 80117401, 70234439, 80058654, 80125979, 8016389…
## $ type         <chr> "Movie", "Movie", "TV Show", "TV Show", "Movie", "TV Show…
## $ title        <chr> "Norm of the North: King Sized Adventure", "Jandino: What…
## $ director     <chr> "Richard Finn, Tim Maltby", NA, NA, NA, "Fernando Lebrija…
## $ cast         <chr> "Alan Marriott, Andrew Toth, Brian Dobson, Cole Howard, J…
## $ country      <chr> "United States, India, South Korea, China", "United Kingd…
## $ date_added   <chr> "September 9, 2019", "September 9, 2016", "September 8, 2…
## $ release_year <dbl> 2019, 2016, 2013, 2016, 2017, 2016, 2014, 2017, 2017, 201…
## $ rating       <chr> "TV-PG", "TV-MA", "TV-Y7-FV", "TV-Y7", "TV-14", "TV-MA", …
## $ duration     <chr> "90 min", "94 min", "1 Season", "1 Season", "99 min", "1 …
## $ listed_in    <chr> "Children & Family Movies, Comedies", "Stand-Up Comedy", …
## $ description  <chr> "Before planning an awesome wedding for his grandfather, …

Transform Data: Separate Actors

Netflix_Actor <- Netflix %>% 
  separate_rows(cast, sep = ", ") %>% 
  drop_na(cast) %>% 
  rename(actor = cast)

head(Netflix_Actor)
## # A tibble: 6 × 12
##    show_id type  title     director actor country date_added release_year rating
##      <dbl> <chr> <chr>     <chr>    <chr> <chr>   <chr>             <dbl> <chr> 
## 1 81145628 Movie Norm of … Richard… Alan… United… September…         2019 TV-PG 
## 2 81145628 Movie Norm of … Richard… Andr… United… September…         2019 TV-PG 
## 3 81145628 Movie Norm of … Richard… Bria… United… September…         2019 TV-PG 
## 4 81145628 Movie Norm of … Richard… Cole… United… September…         2019 TV-PG 
## 5 81145628 Movie Norm of … Richard… Jenn… United… September…         2019 TV-PG 
## 6 81145628 Movie Norm of … Richard… Jona… United… September…         2019 TV-PG 
## # ℹ 3 more variables: duration <chr>, listed_in <chr>, description <chr>

Filter TV Shows and Count Top 6 Actors

Top_Actors <- Netflix_Actor %>%
  select(type, actor) %>% 
  filter(type == "TV Show") %>% 
  group_by(actor) %>% 
  count(sort = TRUE) %>% 
  ungroup() %>% 
  head(6)

Top_Actors
## # A tibble: 6 × 2
##   actor                  n
##   <chr>              <int>
## 1 Takahiro Sakurai      18
## 2 Yuki Kaji             16
## 3 Daisuke Ono           14
## 4 David Attenborough    14
## 5 Ashleigh Ball         12
## 6 Hiroshi Kamiya        12

Visualize Top 6 Actors

Top_Actors %>%
  ggplot(aes(x = reorder(actor, n), y = n, fill = actor)) +
  geom_bar(stat = "identity") +
  coord_flip() +
  labs(title = "Top 6 Actors in TV Shows",
       x = "Actor",
       y = "Number of Appearances") +
  theme_minimal() +
  theme(legend.position = "none")