Import Dataset

dat <- read_csv("netflix_titles.csv")

## Rows: 8807 Columns: 12
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (11): show_id, type, title, director, cast, country, date_added, rating,...
## dbl  (1): release_year
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.

head(dat)

## # A tibble: 6 × 12
##   show_id type    title    director cast  country date_added release_year rating
##   <chr>   <chr>   <chr>    <chr>    <chr> <chr>   <chr>             <dbl> <chr> 
## 1 s1      Movie   Dick Jo… Kirsten… <NA>  United… September…         2020 PG-13 
## 2 s2      TV Show Blood &… <NA>     Ama … South … September…         2021 TV-MA 
## 3 s3      TV Show Ganglan… Julien … Sami… <NA>    September…         2021 TV-MA 
## 4 s4      TV Show Jailbir… <NA>     <NA>  <NA>    September…         2021 TV-MA 
## 5 s5      TV Show Kota Fa… <NA>     Mayu… India   September…         2021 TV-MA 
## 6 s6      TV Show Midnigh… Mike Fl… Kate… <NA>    September…         2021 TV-MA 
## # ℹ 3 more variables: duration <chr>, listed_in <chr>, description <chr>

Figure 1: Movies vs TV Shows

ggplot(dat, aes(x = type, fill = type)) +
  geom_bar() +
  labs(
    title = "Movies vs TV Shows on Netflix",
    x = "Content Type",
    y = "Count"
  )

Figure 2: Netflix Content Released Over Years

ggplot(dat, aes(x = release_year)) +
  geom_histogram(binwidth = 1, fill = "steelblue") +
  labs(
    title = "Netflix Content Released Over Years",
    x = "Release Year",
    y = "Number of Titles"
  )

Figure 3: Top Countries Producing Netflix Content

top_countries <- dat %>%
  drop_na(country) %>%
  count(country, sort = TRUE) %>%
  head(10)

ggplot(top_countries, aes(x = reorder(country, n), y = n)) +
  geom_col(fill = "darkred") +
  coord_flip() +
  labs(
    title = "Top Countries Producing Netflix Content",
    x = "Country",
    y = "Number of Titles"
  )

Figure 4: Ratings Distribution

ggplot(dat, aes(x = rating, fill = rating)) +
  geom_bar() +
  labs(
    title = "Distribution of Netflix Ratings",
    x = "Rating",
    y = "Count"
  ) +
  theme(axis.text.x = element_text(angle = 45, hjust = 1))

Figure 5: Most Common Genres

top_genres <- dat %>%
  count(listed_in, sort = TRUE) %>%
  head(10)

ggplot(top_genres, aes(x = reorder(listed_in, n), y = n)) +
  geom_col(fill = "purple") +
  coord_flip() +
  labs(
    title = "Most Common Netflix Genres",
    x = "Genre",
    y = "Count"
  )

Figure 6: Movie Duration Distribution

movie_data <- dat %>%
  filter(type == "Movie")

ggplot(movie_data, aes(x = duration)) +
  geom_bar(fill = "orange") +
  labs(
    title = "Movie Duration Distribution",
    x = "Duration",
    y = "Count"
  ) +
  theme(axis.text.x = element_blank())

Figure 7: Top Directors on Netflix

top_directors <- dat %>%
  drop_na(director) %>%
  count(director, sort = TRUE) %>%
  head(10)

ggplot(top_directors, aes(x = reorder(director, n), y = n)) +
  geom_col(fill = "darkgreen") +
  coord_flip() +
  labs(
    title = "Top Directors on Netflix",
    x = "Director",
    y = "Number of Titles"
  )

Figure 8: Interactive Plotly Visualization

p <- ggplot(dat, aes(x = release_year)) +
  geom_histogram(binwidth = 1, fill = "skyblue") +
  labs(
    title = "Interactive Netflix Release Trend",
    x = "Release Year",
    y = "Count"
  )

ggplotly(p)

Netflix Content Analysis and Visualization

Nithin Pathikonda

Import Dataset

Figure 1: Movies vs TV Shows

Figure 2: Netflix Content Released Over Years

Figure 3: Top Countries Producing Netflix Content

Figure 4: Ratings Distribution

Figure 5: Most Common Genres

Figure 6: Movie Duration Distribution

Figure 7: Top Directors on Netflix

Figure 8: Interactive Plotly Visualization