Load Required Libraries
library(tidyverse)
Load the Dataset
# Check current working directory
cat("Current working directory:", getwd(), "\n")
## Current working directory: /cloud/project
# List CSV files in current directory
cat("\nCSV files found:\n")
##
## CSV files found:
print(list.files(pattern = "\\.csv$"))
## [1] "Netflix.csv"
# Read the Netflix dataset
# If file is not found, update the path below
Netflix <- read.csv("Netflix.csv")
Find Top 6 Actors in TV Shows
# Finding the 6 actors that have the most appearances on TV show
top_actors <- Netflix_Actor %>%
select(type, actor) %>%
filter(type == "TV Show") %>%
group_by(actor) %>%
count(sort = TRUE) %>%
ungroup() %>%
head(6)
# Display the results
print(top_actors)
## # A tibble: 6 × 2
## actor n
## <chr> <int>
## 1 "" 210
## 2 "Takahiro Sakurai" 18
## 3 "Yuki Kaji" 16
## 4 "Daisuke Ono" 14
## 5 "David Attenborough" 14
## 6 "Ashleigh Ball" 12
Visualization
# Create a modern bar plot with gradient colors
ggplot(top_actors, aes(x = reorder(actor, n), y = n)) +
geom_col(aes(fill = n), width = 0.7, show.legend = FALSE) +
geom_text(aes(label = paste0(n, " shows")),
hjust = -0.1, size = 5, fontface = "bold", color = "#2c3e50") +
coord_flip() +
scale_fill_gradient(low = "#3498db", high = "#e74c3c") +
labs(
title = "Netflix's Most Prolific TV Show Actors",
subtitle = "Top 6 actors by number of TV show appearances",
x = NULL,
y = "Number of TV Show Appearances",
caption = "Data Source: Netflix Dataset | Analysis by Solongo"
) +
theme_minimal() +
theme(
plot.title = element_text(hjust = 0.5, face = "bold", size = 20,
color = "#2c3e50", margin = margin(b = 5)),
plot.subtitle = element_text(hjust = 0.5, size = 14,
color = "#7f8c8d", margin = margin(b = 20)),
plot.caption = element_text(size = 10, color = "#95a5a6",
hjust = 1, margin = margin(t = 15)),
axis.text.y = element_text(size = 13, face = "bold", color = "#34495e"),
axis.text.x = element_text(size = 11, color = "#7f8c8d"),
axis.title.x = element_text(size = 13, face = "bold",
color = "#2c3e50", margin = margin(t = 10)),
panel.grid.major.y = element_blank(),
panel.grid.minor = element_blank(),
panel.grid.major.x = element_line(color = "#ecf0f1", size = 0.5),
plot.background = element_rect(fill = "#ffffff", color = NA),
panel.background = element_rect(fill = "#f8f9fa", color = NA),
plot.margin = margin(20, 20, 20, 20)
) +
scale_y_continuous(expand = expansion(mult = c(0, 0.15)))
