Load Required Libraries

library(tidyverse)

Load the Dataset

# Check current working directory
cat("Current working directory:", getwd(), "\n")
## Current working directory: /cloud/project
# List CSV files in current directory
cat("\nCSV files found:\n")
## 
## CSV files found:
print(list.files(pattern = "\\.csv$"))
## [1] "Netflix.csv"
# Read the Netflix dataset
# If file is not found, update the path below
Netflix <- read.csv("Netflix.csv")

Data Transformation

# Separate actors in the cast column and rename the column
Netflix_Actor <- Netflix %>%
  separate_rows(cast, sep = ", ") %>%
  drop_na(cast) %>%
  rename(actor = cast)

# Display the transformed data structure
head(Netflix_Actor)
## # A tibble: 6 × 12
##    show_id type  title     director actor country date_added release_year rating
##      <int> <chr> <chr>     <chr>    <chr> <chr>   <chr>             <int> <chr> 
## 1 81145628 Movie Norm of … Richard… Alan… United… September…         2019 TV-PG 
## 2 81145628 Movie Norm of … Richard… Andr… United… September…         2019 TV-PG 
## 3 81145628 Movie Norm of … Richard… Bria… United… September…         2019 TV-PG 
## 4 81145628 Movie Norm of … Richard… Cole… United… September…         2019 TV-PG 
## 5 81145628 Movie Norm of … Richard… Jenn… United… September…         2019 TV-PG 
## 6 81145628 Movie Norm of … Richard… Jona… United… September…         2019 TV-PG 
## # ℹ 3 more variables: duration <chr>, listed_in <chr>, description <chr>

Find Top 6 Actors in TV Shows

# Finding the 6 actors that have the most appearances on TV show
top_actors <- Netflix_Actor %>%
  select(type, actor) %>%
  filter(type == "TV Show") %>%
  group_by(actor) %>%
  count(sort = TRUE) %>%
  ungroup() %>%
  head(6)

# Display the results
print(top_actors)
## # A tibble: 6 × 2
##   actor                    n
##   <chr>                <int>
## 1 ""                     210
## 2 "Takahiro Sakurai"      18
## 3 "Yuki Kaji"             16
## 4 "Daisuke Ono"           14
## 5 "David Attenborough"    14
## 6 "Ashleigh Ball"         12

Visualization

# Create a modern bar plot with gradient colors
ggplot(top_actors, aes(x = reorder(actor, n), y = n)) +
  geom_col(aes(fill = n), width = 0.7, show.legend = FALSE) +
  geom_text(aes(label = paste0(n, " shows")), 
            hjust = -0.1, size = 5, fontface = "bold", color = "#2c3e50") +
  coord_flip() +
  scale_fill_gradient(low = "#3498db", high = "#e74c3c") +
  labs(
    title = "Netflix's Most Prolific TV Show Actors",
    subtitle = "Top 6 actors by number of TV show appearances",
    x = NULL,
    y = "Number of TV Show Appearances",
    caption = "Data Source: Netflix Dataset | Analysis by Solongo"
  ) +
  theme_minimal() +
  theme(
    plot.title = element_text(hjust = 0.5, face = "bold", size = 20, 
                              color = "#2c3e50", margin = margin(b = 5)),
    plot.subtitle = element_text(hjust = 0.5, size = 14, 
                                 color = "#7f8c8d", margin = margin(b = 20)),
    plot.caption = element_text(size = 10, color = "#95a5a6", 
                                hjust = 1, margin = margin(t = 15)),
    axis.text.y = element_text(size = 13, face = "bold", color = "#34495e"),
    axis.text.x = element_text(size = 11, color = "#7f8c8d"),
    axis.title.x = element_text(size = 13, face = "bold", 
                                color = "#2c3e50", margin = margin(t = 10)),
    panel.grid.major.y = element_blank(),
    panel.grid.minor = element_blank(),
    panel.grid.major.x = element_line(color = "#ecf0f1", size = 0.5),
    plot.background = element_rect(fill = "#ffffff", color = NA),
    panel.background = element_rect(fill = "#f8f9fa", color = NA),
    plot.margin = margin(20, 20, 20, 20)
  ) +
  scale_y_continuous(expand = expansion(mult = c(0, 0.15)))