Read the Dataset

library(tidyverse)
library(ggplot2)
library(dplyr)
netflix <- read_csv("netflix_titles.csv.zip")

Cleaning the Dataset

netflix_clean <- na.omit(netflix)
paste("After removing missing values, there are", prettyNum(nrow(netflix_clean),big.mark=","), "rows.")
## [1] "After removing missing values, there are 5,332 rows."

Amount of movies and tv shows

categoryamount <- table(netflix_clean$type)
paste("There are", prettyNum(categoryamount[1],big.mark=","), "Movies and", categoryamount[2], "TV Shows.")
## [1] "There are 5,185 Movies and 147 TV Shows."

Graph of Netflix Titles Released Per Year

ggplot(netflix_clean,aes(x=release_year))+geom_histogram(binwidth=1,fill="skyblue",color="black")+labs(title="Number of Netflix Titles Released per Year",x="Release Year",y="Count")

Top 5 Countries with the Most Netflix Titles

country_counts <- sort(table(netflix_clean$country),decreasing=TRUE)
top5_countries <- head(country_counts,5)
top5_df <-data.frame(country=names(top5_countries),n=as.numeric(top5_countries))
ggplot(top5_df, aes(x=country, y=n, fill=country))+geom_col()+coord_flip()+labs(title="Top 5 Countries with the Most Netflix Titles",x="Country",y="Number of Titles")