Loading the Dataset
netflix <- read_csv('./netflix-data/NetflixOriginals.csv')
## Rows: 584 Columns: 6
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (4): Title, Genre, Premiere, Language
## dbl (2): Runtime, IMDB Score
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
head(netflix)
## # A tibble: 6 × 6
## Title Genre Premiere Runtime `IMDB Score` Language
## <chr> <chr> <chr> <dbl> <dbl> <chr>
## 1 Enter the Anime Documentary August 5,… 58 2.5 English…
## 2 Dark Forces Thriller August 21… 81 2.6 Spanish
## 3 The App Science fiction/Drama December … 79 2.6 Italian
## 4 The Open House Horror thriller January 1… 94 3.2 English
## 5 Kaali Khuhi Mystery October 3… 90 3.4 Hindi
## 6 Drive Action November … 147 3.5 Hindi
as_tibble(sapply(netflix, class))
## # A tibble: 6 × 1
## value
## <chr>
## 1 character
## 2 character
## 3 character
## 4 numeric
## 5 numeric
## 6 character
netflix <- netflix %>% mutate(Released = mdy(Premiere))
netflix <- netflix %>%
mutate(Year = year(Released)) %>%
mutate(Month = month(Released, label=TRUE)) %>%
mutate(Date = day(Released)) %>%
mutate(Day = wday(Released, label=TRUE, abbr=FALSE))
Data Visualisation
Number of Movies Released Each Year
n <- netflix %>% group_by(Year) %>% summarise(total=n())
n_graph <- ggplot(data=n)+
geom_col(mapping=aes(
x=Year,
y=total,
fill=ifelse(total==max(total),"red","grey"))
)+
labs(title="Netflix Movies released each year")+
theme_minimal()+
scale_fill_manual(values=c("#2d2d2d","#E50914"))+
theme(
legend.position="none",
plot.title=element_text(
family="Bebas Neue",
size=25,
color="#E50914"),
axis.title.x = element_blank(),
axis.title.y = element_blank(),
panel.grid.major.x = element_blank(),
panel.grid.minor = element_blank()
)
n_graph

Number of Movies Released Each Month
n1 <- netflix %>% group_by(Month) %>% summarise(total=n())
n1_graph <- ggplot(data=n1)+
geom_col(mapping = aes(
x=Month,
y=total,
fill=ifelse(total==max(total),"red","grey")))+
labs(title='Netflix Movies Released Each Month')+
theme_minimal()+
scale_fill_manual(values=c("#2d2d2d","#E50914"))+
theme(
legend.position='none',
plot.title = element_text(
family="Bebas Neue",
size=25,
color="#E50914"),
axis.title.x=element_blank(),
axis.title.y=element_blank(),
panel.grid.major.x = element_blank(),
panel.grid.minor = element_blank(),
text = element_text(size=20))
n1_graph

Number of Movies Released Each Date of Month
n2 <- netflix %>% group_by(Date) %>% summarise(total=n())
n2_graph <- ggplot(data=n2)+
geom_col(mapping=aes(x=Date, y=total,
fill=ifelse(total==max(total),"red","grey")))+
labs(
title="Netflix Movies released by date of each month")+
theme_minimal()+
scale_fill_manual(values = c("#2d2d2d","#E50914"))+
theme(
legend.position="none",
plot.title=element_text(
family="Bebas Neue",
size=25,
color="#E50914"
),
axis.title.x=element_blank(),
axis.title.y=element_blank(),
panel.grid.major.x=element_blank(),
panel.grid.minor = element_blank(),
text=element_text(size=20)
)
n2_graph

Number of Movies Released Each Day of Week
n3 <- netflix %>% group_by(Day) %>% summarise(total=n())
n3_graph <-
ggplot(data=n3)+
geom_col(mapping=aes(
x=Day,
y=total,
fill=ifelse(total==max(total),"red","black")))+
labs(title="Netflix Movies released by day of the week")+
theme_minimal()+
scale_fill_manual(values=c("#2d2d2d","#E50914"))+
theme(
legend.position="none",
plot.title = element_text(
family="Bebas Neue",
size=25,
color="#E50914"),
axis.title.x=element_blank(),
axis.title.y=element_blank(),
panel.grid.major.x=element_blank(),
panel.grid.minor=element_blank(),
text=element_text(size=20)
)
n3_graph

Most Popular Genres
n4 <- netflix %>% group_by(Genre) %>%
summarise(Movies=n()) %>%
arrange(desc(Movies)) %>%
head(5)
n4_graph <-
ggplot(data=n4)+
geom_col(mapping = aes(
x=reorder(Genre, -Movies),
y=Movies,
fill=ifelse(Movies == max(Movies),"red","black")))+
labs(title="Most Popular Genres")+
theme_minimal()+
scale_fill_manual(values = c("#2d2d2d","#E50914"))+
theme(
legend.position="none",
plot.title = element_text(
family="Bebas Neue",
size=25,
color="#E50914"),
axis.title.x=element_blank(),
axis.title.y=element_blank(),
panel.grid.major.x=element_blank(),
panel.grid.minor = element_blank(),
text = element_text(size=20)
)
n4_graph

Most Popular Languages
n5 <- netflix %>%
group_by(Language) %>%
summarise(Movies=n()) %>%
arrange(desc(Movies)) %>%
head(5)
n5_graph <-
ggplot(data=n5)+
geom_col(mapping=aes(
x=reorder(Language, -Movies),
y=Movies,
fill=ifelse(Movies == max(Movies),"red","black")))+
labs(title="Most Popular Languages")+
theme_minimal()+
scale_fill_manual(values=c("#2d2d2d","#E50914"))+
theme(
legend.position="none",
plot.title = element_text(
family="Bebas Neue",
size=25,
color="#E50914"),
axis.title.x=element_blank(),
axis.title.y=element_blank(),
panel.grid.major.x=element_blank(),
panel.grid.minor = element_blank(),
title=element_text(size=20)
)
n5_graph

IMDB Score Distribution
n6_graph <- ggplot(netflix)+
geom_dotplot(mapping=aes(x=`IMDB Score`),
binwidth=0.3,fill="#2d2d2d",color="#e9ecef")+
labs(title="IMDB Score Distribution")+
theme_minimal()+
theme(
legend.position="none",
plot.title=element_text(
family="Bebas Neue",
size=25,
color="#E50914"),
axis.title.x=element_blank(),
axis.title.y=element_blank(),
panel.grid.major.x=element_blank()
)
n6_graph

Highest Rated Movies
n7 <- netflix %>% arrange(desc(`IMDB Score`)) %>% head(5)
n7_graph <- ggplot(data=n7)+
geom_col(mapping=aes(
x=reorder(`Title`,`IMDB Score`),
y=`IMDB Score`,
fill=ifelse(
`IMDB Score`==max(`IMDB Score`),
"red","black")))+
labs(title="Highest Rated Movies")+
theme_minimal()+
scale_fill_manual(values = c("#2d2d2d","#E50914"))+
coord_flip()+
theme(
legend.position="none",
plot.title = element_text(
family="Bebas Neue",
size=25,
color="#E50914"),
axis.title.x=element_blank(),
axis.title.y=element_blank(),
panel.grid.major.x=element_blank()
)
n7_graph

Lowest Rated Movies
n8 <- netflix %>% arrange(desc(-`IMDB Score`)) %>% head(5)
n8_graph <- ggplot(data=n8)+
geom_col(mapping=aes(
x=reorder(`Title`, -`IMDB Score`),
y=`IMDB Score`,
fill=ifelse(
`IMDB Score`==min(`IMDB Score`),
"red","black")))+
labs(title="Lowest Rated Movies")+
theme_minimal()+
scale_fill_manual(values = c("#2d2d2d","#E50914"))+
coord_flip()+
theme(
legend.position="none",
plot.title = element_text(
family="Bebas Neue",
size=25,
color="#E50914"),
axis.title.x=element_blank(),
axis.title.y=element_blank(),
panel.grid.major.x=element_blank()
)
n8_graph

Movie Runtime
n9_graph <- ggplot(data=netflix)+
geom_dotplot(
mapping=aes(x=Runtime),
binwidth=2.25,
fill="#2d2d2d",
color="#e9ecef")+
labs(title="Movie Runtime")+
theme_minimal()+
theme(
legend.position="none",
plot.title=element_text(
family="Bebas Neue",
size=25,
color="#E50914"),
axis.title.x = element_blank(),
axis.title.y=element_blank(),
panel.grid.major.x=element_blank()
)
n9_graph

Longest Movies
n10 <- netflix %>% arrange(desc(Runtime)) %>% head(5)
n10_graph <- ggplot(data=n10)+
geom_col(mapping=aes(
x=reorder(`Title`,`Runtime`),
y=`Runtime`,
fill=ifelse(Runtime==max(`Runtime`),"red","black")))+
labs(title="Longest Movies")+
theme_minimal()+
scale_fill_manual(values=c("#2d2d2d","#E50914"))+
coord_flip()+
theme(
legend.position="none",
plot.title = element_text(
family="Bebas Neue",
size=25,
color="#E50914"),
axis.title.x=element_blank(),
axis.title.y=element_blank(),
panel.grid.major.x=element_blank()
)
n10_graph

Shortest Movies
n11 <- netflix %>% arrange(desc(-Runtime)) %>% head(5)
n11_graph <- ggplot(data=n11)+
geom_col(mapping=aes(
x = reorder(`Title`,`Runtime`),
y = `Runtime`,
fill = ifelse(Runtime==min(`Runtime`),"red","black")))+
labs(title="Shortest Movies")+
theme_minimal()+
scale_fill_manual(values = c("#2d2d2d","#E50914"))+
coord_flip()+
theme(
legend.position="none",
plot.title = element_text(
family="Bebas Neue",
size=25,
color="#E50914"),
axis.title.x=element_blank(),
axis.title.y=element_blank(),
panel.grid.major.x=element_blank(),
text=element_text(size=20)
)
n11_graph

Runtime vs IMDB Rating
n12_graph <- ggplot(data=netflix,aes(x = `IMDB Score`, y = Runtime))+
geom_point()+
geom_smooth(method = "lm", color="#E50914")+
labs(title="Runtime vs IMDB Rating")+
theme_minimal()+
scale_fill_manual(values=c("#2d2d2d","#E50914"))+
theme(
legend.position = "none",
plot.title=element_text(
family="Bebas Neue",
size=25,
color="#E50914"),
axis.title.x=element_blank(),
axis.title.y=element_blank(),
panel.grid.major.x=element_blank()
)
n12_graph
## `geom_smooth()` using formula = 'y ~ x'
