hw2

library(dplyr)

## 
## Attaching package: 'dplyr'

## The following objects are masked from 'package:stats':
## 
##     filter, lag

## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union

library(readr)
library(tidyr)
data <- read_csv("Netflix.csv")

## Rows: 6234 Columns: 12

## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (10): type, title, director, cast, country, date_added, rating, duration...
## dbl  (2): show_id, release_year
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.

data <- data %>%
  separate_rows(cast, sep = ", ") %>%
  filter(!is.na(cast))
result <- data %>%
  group_by(cast) %>%
  summarize(appearances = n()) %>%
  arrange(desc(appearances)) %>%
  top_n(6)

## Selecting by appearances

result <- result %>%
  rename(actor = cast)
print(result)

## # A tibble: 6 × 2
##   actor            appearances
##   <chr>                  <int>
## 1 Anupam Kher               33
## 2 Shah Rukh Khan            30
## 3 Naseeruddin Shah          27
## 4 Om Puri                   27
## 5 Akshay Kumar              26
## 6 Yuki Kaji                 26

hw2

khongorzul

2023-10-10