Import Dataset
dat <- read_csv("netflix_titles.csv")
## Rows: 8807 Columns: 12
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (11): show_id, type, title, director, cast, country, date_added, rating,...
## dbl (1): release_year
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
head(dat)
## # A tibble: 6 × 12
## show_id type title director cast country date_added release_year rating
## <chr> <chr> <chr> <chr> <chr> <chr> <chr> <dbl> <chr>
## 1 s1 Movie Dick Jo… Kirsten… <NA> United… September… 2020 PG-13
## 2 s2 TV Show Blood &… <NA> Ama … South … September… 2021 TV-MA
## 3 s3 TV Show Ganglan… Julien … Sami… <NA> September… 2021 TV-MA
## 4 s4 TV Show Jailbir… <NA> <NA> <NA> September… 2021 TV-MA
## 5 s5 TV Show Kota Fa… <NA> Mayu… India September… 2021 TV-MA
## 6 s6 TV Show Midnigh… Mike Fl… Kate… <NA> September… 2021 TV-MA
## # ℹ 3 more variables: duration <chr>, listed_in <chr>, description <chr>
Figure 2: Netflix Content Released Over Years
ggplot(dat, aes(x = release_year)) +
geom_histogram(binwidth = 1, fill = "steelblue") +
labs(
title = "Netflix Content Released Over Years",
x = "Release Year",
y = "Number of Titles"
)

Figure 3: Top Countries Producing Netflix Content
top_countries <- dat %>%
drop_na(country) %>%
count(country, sort = TRUE) %>%
head(10)
ggplot(top_countries, aes(x = reorder(country, n), y = n)) +
geom_col(fill = "darkred") +
coord_flip() +
labs(
title = "Top Countries Producing Netflix Content",
x = "Country",
y = "Number of Titles"
)
