HW2

library(dplyr)
library(tidyr)
library(ggplot2)

# Sample data
netflix <- read.csv("Netflix.csv")

# Analyze the data and save the result to a variable
actor_counts <- netflix %>% 
  filter(type == "TV Show") %>% 
  select(title, cast) %>% 
  separate_rows(cast, sep = ",") %>% 
  rename(actor = cast) %>% 
  count(actor, sort = TRUE)

# Print the results
actor_counts

## # A tibble: 11,871 × 2
##    actor                    n
##    <chr>                <int>
##  1 ""                     210
##  2 " Takahiro Sakurai"     18
##  3 " Yuki Kaji"            14
##  4 "David Attenborough"    14
##  5 " Tomokazu Sugita"      12
##  6 " Ai Kayano"            11
##  7 " Daisuke Ono"          11
##  8 " Junichi Suwabe"       10
##  9 " Ashleigh Ball"         9
## 10 " Hiroshi Kamiya"        9
## # ℹ 11,861 more rows

# Plot the top 10 actors
top_actors <- actor_counts %>% head(10)

ggplot(top_actors, aes(x = reorder(actor, n), y = n)) +
  geom_bar(stat = "identity", fill = "steelblue") +
  coord_flip() +
  labs(title = "Top 10 Actors in Netflix TV Shows",
       x = "Actor",
       y = "Number of Appearances") +
  theme_minimal()

HW2

2024-10-10