# Load Libraries
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(ggplot2)
# Load Netflix Dataset
netflix_data <- read.csv("C:/Users/adars/Downloads/archive (6)/netflix_titles.csv")
View(netflix_data)
# ---------------------------------------------------
# 1 Structure of Dataset
# ---------------------------------------------------
str(netflix_data)
## 'data.frame': 8807 obs. of 12 variables:
## $ show_id : chr "s1" "s2" "s3" "s4" ...
## $ type : chr "Movie" "TV Show" "TV Show" "TV Show" ...
## $ title : chr "Dick Johnson Is Dead" "Blood & Water" "Ganglands" "Jailbirds New Orleans" ...
## $ director : chr "Kirsten Johnson" "" "Julien Leclercq" "" ...
## $ cast : chr "" "Ama Qamata, Khosi Ngema, Gail Mabalane, Thabang Molaba, Dillon Windvogel, Natasha Thahane, Arno Greeff, Xolile "| __truncated__ "Sami Bouajila, Tracy Gotoas, Samuel Jouy, Nabiha Akkari, Sofia Lesaffre, Salim Kechiouche, Noureddine Farihi, G"| __truncated__ "" ...
## $ country : chr "United States" "South Africa" "" "" ...
## $ date_added : chr "September 25, 2021" "September 24, 2021" "September 24, 2021" "September 24, 2021" ...
## $ release_year: int 2020 2021 2021 2021 2021 2021 2021 1993 2021 2021 ...
## $ rating : chr "PG-13" "TV-MA" "TV-MA" "TV-MA" ...
## $ duration : chr "90 min" "2 Seasons" "1 Season" "1 Season" ...
## $ listed_in : chr "Documentaries" "International TV Shows, TV Dramas, TV Mysteries" "Crime TV Shows, International TV Shows, TV Action & Adventure" "Docuseries, Reality TV" ...
## $ description : chr "As her father nears the end of his life, filmmaker Kirsten Johnson stages his death in inventive and comical wa"| __truncated__ "After crossing paths at a party, a Cape Town teen sets out to prove whether a private-school swimming star is h"| __truncated__ "To protect his family from a powerful drug lord, skilled thief Mehdi and his expert team of robbers are pulled "| __truncated__ "Feuds, flirtations and toilet talk go down among the incarcerated women at the Orleans Justice Center in New Or"| __truncated__ ...
names(netflix_data)
## [1] "show_id" "type" "title" "director" "cast"
## [6] "country" "date_added" "release_year" "rating" "duration"
## [11] "listed_in" "description"
head(netflix_data)
## show_id type title director
## 1 s1 Movie Dick Johnson Is Dead Kirsten Johnson
## 2 s2 TV Show Blood & Water
## 3 s3 TV Show Ganglands Julien Leclercq
## 4 s4 TV Show Jailbirds New Orleans
## 5 s5 TV Show Kota Factory
## 6 s6 TV Show Midnight Mass Mike Flanagan
## cast
## 1
## 2 Ama Qamata, Khosi Ngema, Gail Mabalane, Thabang Molaba, Dillon Windvogel, Natasha Thahane, Arno Greeff, Xolile Tshabalala, Getmore Sithole, Cindy Mahlangu, Ryle De Morny, Greteli Fincham, Sello Maake Ka-Ncube, Odwa Gwanya, Mekaila Mathys, Sandi Schultz, Duane Williams, Shamilla Miller, Patrick Mofokeng
## 3 Sami Bouajila, Tracy Gotoas, Samuel Jouy, Nabiha Akkari, Sofia Lesaffre, Salim Kechiouche, Noureddine Farihi, Geert Van Rampelberg, Bakary Diombera
## 4
## 5 Mayur More, Jitendra Kumar, Ranjan Raj, Alam Khan, Ahsaas Channa, Revathi Pillai, Urvi Singh, Arun Kumar
## 6 Kate Siegel, Zach Gilford, Hamish Linklater, Henry Thomas, Kristin Lehman, Samantha Sloyan, Igby Rigney, Rahul Kohli, Annarah Cymone, Annabeth Gish, Alex Essoe, Rahul Abburi, Matt Biedel, Michael Trucco, Crystal Balint, Louis Oliver
## country date_added release_year rating duration
## 1 United States September 25, 2021 2020 PG-13 90 min
## 2 South Africa September 24, 2021 2021 TV-MA 2 Seasons
## 3 September 24, 2021 2021 TV-MA 1 Season
## 4 September 24, 2021 2021 TV-MA 1 Season
## 5 India September 24, 2021 2021 TV-MA 2 Seasons
## 6 September 24, 2021 2021 TV-MA 1 Season
## listed_in
## 1 Documentaries
## 2 International TV Shows, TV Dramas, TV Mysteries
## 3 Crime TV Shows, International TV Shows, TV Action & Adventure
## 4 Docuseries, Reality TV
## 5 International TV Shows, Romantic TV Shows, TV Comedies
## 6 TV Dramas, TV Horror, TV Mysteries
## description
## 1 As her father nears the end of his life, filmmaker Kirsten Johnson stages his death in inventive and comical ways to help them both face the inevitable.
## 2 After crossing paths at a party, a Cape Town teen sets out to prove whether a private-school swimming star is her sister who was abducted at birth.
## 3 To protect his family from a powerful drug lord, skilled thief Mehdi and his expert team of robbers are pulled into a violent and deadly turf war.
## 4 Feuds, flirtations and toilet talk go down among the incarcerated women at the Orleans Justice Center in New Orleans on this gritty reality series.
## 5 In a city of coaching centers known to train India’s finest collegiate minds, an earnest but unexceptional student and his friends navigate campus life.
## 6 The arrival of a charismatic young priest brings glorious miracles, ominous mysteries and renewed religious fervor to a dying town desperate to believe.
tail(netflix_data)
## show_id type title director
## 8802 s8802 Movie Zinzana Majid Al Ansari
## 8803 s8803 Movie Zodiac David Fincher
## 8804 s8804 TV Show Zombie Dumb
## 8805 s8805 Movie Zombieland Ruben Fleischer
## 8806 s8806 Movie Zoom Peter Hewitt
## 8807 s8807 Movie Zubaan Mozez Singh
## cast
## 8802 Ali Suliman, Saleh Bakri, Yasa, Ali Al-Jabri, Mansoor Alfeeli, Ahd
## 8803 Mark Ruffalo, Jake Gyllenhaal, Robert Downey Jr., Anthony Edwards, Brian Cox, Elias Koteas, Donal Logue, John Carroll Lynch, Dermot Mulroney, Chloë Sevigny
## 8804
## 8805 Jesse Eisenberg, Woody Harrelson, Emma Stone, Abigail Breslin, Amber Heard, Bill Murray, Derek Graf
## 8806 Tim Allen, Courteney Cox, Chevy Chase, Kate Mara, Ryan Newman, Michael Cassidy, Spencer Breslin, Rip Torn, Kevin Zegers
## 8807 Vicky Kaushal, Sarah-Jane Dias, Raaghav Chanana, Manish Chaudhary, Meghna Malik, Malkeet Rauni, Anita Shabdish, Chittaranjan Tripathy
## country date_added release_year rating
## 8802 United Arab Emirates, Jordan March 9, 2016 2015 TV-MA
## 8803 United States November 20, 2019 2007 R
## 8804 July 1, 2019 2018 TV-Y7
## 8805 United States November 1, 2019 2009 R
## 8806 United States January 11, 2020 2006 PG
## 8807 India March 2, 2019 2015 TV-14
## duration listed_in
## 8802 96 min Dramas, International Movies, Thrillers
## 8803 158 min Cult Movies, Dramas, Thrillers
## 8804 2 Seasons Kids' TV, Korean TV Shows, TV Comedies
## 8805 88 min Comedies, Horror Movies
## 8806 88 min Children & Family Movies, Comedies
## 8807 111 min Dramas, International Movies, Music & Musicals
## description
## 8802 Recovering alcoholic Talal wakes up inside a small-town police station cell, where he's subject to the mind games of a psychotic sadist.
## 8803 A political cartoonist, a crime reporter and a pair of cops investigate San Francisco's infamous Zodiac Killer in this thriller based on a true story.
## 8804 While living alone in a spooky town, a young girl befriends a motley crew of zombie children with diverse personalities.
## 8805 Looking to survive in a world taken over by zombies, a dorky college student teams with an urban roughneck and a pair of grifter sisters.
## 8806 Dragged from civilian life, a former superhero must train a new crop of youthful saviors when the military preps for an attack by a familiar villain.
## 8807 A scrappy but poor boy worms his way into a tycoon's dysfunctional family, while facing his fear of music and the truth about his past.
summary(netflix_data)
## show_id type title director
## Length :8807 Length :8807 Length :8807 Length :8807
## N.unique :8807 N.unique : 2 N.unique :8807 N.unique :4529
## N.blank : 0 N.blank : 0 N.blank : 0 N.blank :2634
## Min.nchar: 2 Min.nchar: 5 Min.nchar: 1 Min.nchar: 0
## Max.nchar: 5 Max.nchar: 7 Max.nchar: 104 Max.nchar: 208
##
## cast country date_added release_year
## Length :8807 Length :8807 Length :8807 Min. :1925
## N.unique :7693 N.unique : 749 N.unique :1768 1st Qu.:2013
## N.blank : 825 N.blank : 831 N.blank : 10 Median :2017
## Min.nchar: 0 Min.nchar: 0 Min.nchar: 0 Mean :2014
## Max.nchar: 771 Max.nchar: 123 Max.nchar: 19 3rd Qu.:2019
## Max. :2021
## rating duration listed_in description
## Length :8807 Length :8807 Length :8807 Length :8807
## N.unique : 18 N.unique : 221 N.unique : 514 N.unique :8775
## N.blank : 4 N.blank : 3 N.blank : 0 N.blank : 0
## Min.nchar: 0 Min.nchar: 0 Min.nchar: 6 Min.nchar: 61
## Max.nchar: 8 Max.nchar: 10 Max.nchar: 79 Max.nchar: 248
##
dim(netflix_data)
## [1] 8807 12
# ---------------------------------------------------
# 2 Convert Categorical Variables
# ---------------------------------------------------
netflix_data$type <- as.factor(netflix_data$type)
netflix_data$rating <- as.factor(netflix_data$rating)
netflix_data$country <- as.factor(netflix_data$country)
# ---------------------------------------------------
# 3 Missing Values
# ---------------------------------------------------
colSums(is.na(netflix_data))
## show_id type title director cast country
## 0 0 0 0 0 0
## date_added release_year rating duration listed_in description
## 0 0 0 0 0 0
# ---------------------------------------------------
# 4 Filter Movies Released After 2018
# ---------------------------------------------------
latest_movies <- netflix_data %>%
filter(release_year > 2018) %>%
select(title, type, release_year)
head(latest_movies)
## title type release_year
## 1 Dick Johnson Is Dead Movie 2020
## 2 Blood & Water TV Show 2021
## 3 Ganglands TV Show 2021
## 4 Jailbirds New Orleans TV Show 2021
## 5 Kota Factory TV Show 2021
## 6 Midnight Mass TV Show 2021
# ---------------------------------------------------
# 5 Top 10 Latest Releases
# ---------------------------------------------------
top_latest <- netflix_data %>%
arrange(desc(release_year)) %>%
head(10)
top_latest
## show_id type title
## 1 s2 TV Show Blood & Water
## 2 s3 TV Show Ganglands
## 3 s4 TV Show Jailbirds New Orleans
## 4 s5 TV Show Kota Factory
## 5 s6 TV Show Midnight Mass
## 6 s7 Movie My Little Pony: A New Generation
## 7 s9 TV Show The Great British Baking Show
## 8 s10 Movie The Starling
## 9 s11 TV Show Vendetta: Truth, Lies and The Mafia
## 10 s12 TV Show Bangkok Breaking
## director
## 1
## 2 Julien Leclercq
## 3
## 4
## 5 Mike Flanagan
## 6 Robert Cullen, José Luis Ucha
## 7 Andy Devonshire
## 8 Theodore Melfi
## 9
## 10 Kongkiat Komesiri
## cast
## 1 Ama Qamata, Khosi Ngema, Gail Mabalane, Thabang Molaba, Dillon Windvogel, Natasha Thahane, Arno Greeff, Xolile Tshabalala, Getmore Sithole, Cindy Mahlangu, Ryle De Morny, Greteli Fincham, Sello Maake Ka-Ncube, Odwa Gwanya, Mekaila Mathys, Sandi Schultz, Duane Williams, Shamilla Miller, Patrick Mofokeng
## 2 Sami Bouajila, Tracy Gotoas, Samuel Jouy, Nabiha Akkari, Sofia Lesaffre, Salim Kechiouche, Noureddine Farihi, Geert Van Rampelberg, Bakary Diombera
## 3
## 4 Mayur More, Jitendra Kumar, Ranjan Raj, Alam Khan, Ahsaas Channa, Revathi Pillai, Urvi Singh, Arun Kumar
## 5 Kate Siegel, Zach Gilford, Hamish Linklater, Henry Thomas, Kristin Lehman, Samantha Sloyan, Igby Rigney, Rahul Kohli, Annarah Cymone, Annabeth Gish, Alex Essoe, Rahul Abburi, Matt Biedel, Michael Trucco, Crystal Balint, Louis Oliver
## 6 Vanessa Hudgens, Kimiko Glenn, James Marsden, Sofia Carson, Liza Koshy, Ken Jeong, Elizabeth Perkins, Jane Krakowski, Michael McKean, Phil LaMarr
## 7 Mel Giedroyc, Sue Perkins, Mary Berry, Paul Hollywood
## 8 Melissa McCarthy, Chris O'Dowd, Kevin Kline, Timothy Olyphant, Daveed Diggs, Skyler Gisondo, Laura Harrier, Rosalind Chao, Kimberly Quinn, Loretta Devine, Ravi Kapoor
## 9
## 10 Sukollawat Kanarot, Sushar Manaying, Pavarit Mongkolpisit, Sahajak Boonthanakit, Suthipongse Thatphithakkul, Bhasaworn Bawronkirati, Daweerit Chullasapya, Waratthaya Wongchayaporn, Kittiphoom Wongpentak, Abhicha Thanachanun, Nophand Boonyai, Kittipong Khamsat, Arisara Wongchalee, Jaytiya Naiwattanakul, Pantipa Arunwattanachai, Panupan Jantanawong, Kungtap Saelim, Phumphat Chartsuriyakiat, Issara Veranitinunt, Keerati Sivakuae, Panjai Sirisuwan, Supranee Charoenpol, Suda Chuenban, Visaka Banhansupavat, Pitchatorn Santinatornkul
## country date_added release_year rating duration
## 1 South Africa September 24, 2021 2021 TV-MA 2 Seasons
## 2 September 24, 2021 2021 TV-MA 1 Season
## 3 September 24, 2021 2021 TV-MA 1 Season
## 4 India September 24, 2021 2021 TV-MA 2 Seasons
## 5 September 24, 2021 2021 TV-MA 1 Season
## 6 September 24, 2021 2021 PG 91 min
## 7 United Kingdom September 24, 2021 2021 TV-14 9 Seasons
## 8 United States September 24, 2021 2021 PG-13 104 min
## 9 September 24, 2021 2021 TV-MA 1 Season
## 10 September 23, 2021 2021 TV-MA 1 Season
## listed_in
## 1 International TV Shows, TV Dramas, TV Mysteries
## 2 Crime TV Shows, International TV Shows, TV Action & Adventure
## 3 Docuseries, Reality TV
## 4 International TV Shows, Romantic TV Shows, TV Comedies
## 5 TV Dramas, TV Horror, TV Mysteries
## 6 Children & Family Movies
## 7 British TV Shows, Reality TV
## 8 Comedies, Dramas
## 9 Crime TV Shows, Docuseries, International TV Shows
## 10 Crime TV Shows, International TV Shows, TV Action & Adventure
## description
## 1 After crossing paths at a party, a Cape Town teen sets out to prove whether a private-school swimming star is her sister who was abducted at birth.
## 2 To protect his family from a powerful drug lord, skilled thief Mehdi and his expert team of robbers are pulled into a violent and deadly turf war.
## 3 Feuds, flirtations and toilet talk go down among the incarcerated women at the Orleans Justice Center in New Orleans on this gritty reality series.
## 4 In a city of coaching centers known to train India’s finest collegiate minds, an earnest but unexceptional student and his friends navigate campus life.
## 5 The arrival of a charismatic young priest brings glorious miracles, ominous mysteries and renewed religious fervor to a dying town desperate to believe.
## 6 Equestria's divided. But a bright-eyed hero believes Earth Ponies, Pegasi and Unicorns should be pals — and, hoof to heart, she’s determined to prove it.
## 7 A talented batch of amateur bakers face off in a 10-week competition, whipping up their best dishes in the hopes of being named the U.K.'s best.
## 8 A woman adjusting to life after a loss contends with a feisty bird that's taken over her garden — and a husband who's struggling to find a way forward.
## 9 Sicily boasts a bold "Anti-Mafia" coalition. But what happens when those trying to bring down organized crime are accused of being criminals themselves?
## 10 Struggling to earn a living in Bangkok, a man joins an emergency rescue service and realizes he must unravel a citywide conspiracy.
# ---------------------------------------------------
# 6 Ranking Shows by Release Year
# ---------------------------------------------------
rank_show <- netflix_data %>%
arrange(desc(release_year)) %>%
mutate(rank = row_number())
head(rank_show)
## show_id type title
## 1 s2 TV Show Blood & Water
## 2 s3 TV Show Ganglands
## 3 s4 TV Show Jailbirds New Orleans
## 4 s5 TV Show Kota Factory
## 5 s6 TV Show Midnight Mass
## 6 s7 Movie My Little Pony: A New Generation
## director
## 1
## 2 Julien Leclercq
## 3
## 4
## 5 Mike Flanagan
## 6 Robert Cullen, José Luis Ucha
## cast
## 1 Ama Qamata, Khosi Ngema, Gail Mabalane, Thabang Molaba, Dillon Windvogel, Natasha Thahane, Arno Greeff, Xolile Tshabalala, Getmore Sithole, Cindy Mahlangu, Ryle De Morny, Greteli Fincham, Sello Maake Ka-Ncube, Odwa Gwanya, Mekaila Mathys, Sandi Schultz, Duane Williams, Shamilla Miller, Patrick Mofokeng
## 2 Sami Bouajila, Tracy Gotoas, Samuel Jouy, Nabiha Akkari, Sofia Lesaffre, Salim Kechiouche, Noureddine Farihi, Geert Van Rampelberg, Bakary Diombera
## 3
## 4 Mayur More, Jitendra Kumar, Ranjan Raj, Alam Khan, Ahsaas Channa, Revathi Pillai, Urvi Singh, Arun Kumar
## 5 Kate Siegel, Zach Gilford, Hamish Linklater, Henry Thomas, Kristin Lehman, Samantha Sloyan, Igby Rigney, Rahul Kohli, Annarah Cymone, Annabeth Gish, Alex Essoe, Rahul Abburi, Matt Biedel, Michael Trucco, Crystal Balint, Louis Oliver
## 6 Vanessa Hudgens, Kimiko Glenn, James Marsden, Sofia Carson, Liza Koshy, Ken Jeong, Elizabeth Perkins, Jane Krakowski, Michael McKean, Phil LaMarr
## country date_added release_year rating duration
## 1 South Africa September 24, 2021 2021 TV-MA 2 Seasons
## 2 September 24, 2021 2021 TV-MA 1 Season
## 3 September 24, 2021 2021 TV-MA 1 Season
## 4 India September 24, 2021 2021 TV-MA 2 Seasons
## 5 September 24, 2021 2021 TV-MA 1 Season
## 6 September 24, 2021 2021 PG 91 min
## listed_in
## 1 International TV Shows, TV Dramas, TV Mysteries
## 2 Crime TV Shows, International TV Shows, TV Action & Adventure
## 3 Docuseries, Reality TV
## 4 International TV Shows, Romantic TV Shows, TV Comedies
## 5 TV Dramas, TV Horror, TV Mysteries
## 6 Children & Family Movies
## description
## 1 After crossing paths at a party, a Cape Town teen sets out to prove whether a private-school swimming star is her sister who was abducted at birth.
## 2 To protect his family from a powerful drug lord, skilled thief Mehdi and his expert team of robbers are pulled into a violent and deadly turf war.
## 3 Feuds, flirtations and toilet talk go down among the incarcerated women at the Orleans Justice Center in New Orleans on this gritty reality series.
## 4 In a city of coaching centers known to train India’s finest collegiate minds, an earnest but unexceptional student and his friends navigate campus life.
## 5 The arrival of a charismatic young priest brings glorious miracles, ominous mysteries and renewed religious fervor to a dying town desperate to believe.
## 6 Equestria's divided. But a bright-eyed hero believes Earth Ponies, Pegasi and Unicorns should be pals — and, hoof to heart, she’s determined to prove it.
## rank
## 1 1
## 2 2
## 3 3
## 4 4
## 5 5
## 6 6
# ---------------------------------------------------
# 7 Count of Movies and TV Shows
# ---------------------------------------------------
ggplot(netflix_data,
aes(x = type,
fill = type)) +
geom_bar() +
labs(
title = "Movies vs TV Shows",
x = "Type",
y = "Count"
)
# ---------------------------------------------------
# 8 Release Year Distribution
# ---------------------------------------------------
ggplot(netflix_data,
aes(x = release_year)) +
geom_histogram(
binwidth = 2,
fill = "red",
color = "black"
) +
labs(
title = "Release Year Distribution",
x = "Release Year",
y = "Count"
)
# ---------------------------------------------------
# 9 Rating Distribution
# ---------------------------------------------------
ggplot(netflix_data,
aes(x = rating,
fill = rating)) +
geom_bar() +
labs(
title = "Content Rating Distribution",
x = "Rating",
y = "Count"
)
# ---------------------------------------------------
# 10 Movies vs Release Year
# ---------------------------------------------------
ggplot(netflix_data,
aes(x = release_year,
fill = type)) +
geom_histogram(binwidth = 3) +
labs(
title = "Movies and TV Shows by Release Year",
x = "Release Year",
y = "Count"
)
# ---------------------------------------------------
# 11 Top Countries Producing Content
# ---------------------------------------------------
top_country <- netflix_data %>%
group_by(country) %>%
summarise(total = n()) %>%
arrange(desc(total)) %>%
head(10)
top_country
## # A tibble: 10 × 2
## country total
## <fct> <int>
## 1 "United States" 2818
## 2 "India" 972
## 3 "" 831
## 4 "United Kingdom" 419
## 5 "Japan" 245
## 6 "South Korea" 199
## 7 "Canada" 181
## 8 "Spain" 145
## 9 "France" 124
## 10 "Mexico" 110
ggplot(top_country,
aes(x = reorder(country, total),
y = total)) +
geom_bar(
stat = "identity",
fill = "blue"
) +
coord_flip() +
labs(
title = "Top Countries Producing Netflix Content",
x = "Country",
y = "Count"
)
# ---------------------------------------------------
# 12 TV Shows vs Movies Boxplot
# ---------------------------------------------------
ggplot(netflix_data,
aes(x = type,
y = release_year,
fill = type)) +
geom_boxplot() +
labs(
title = "Release Year by Type",
x = "Type",
y = "Release Year"
)
# ---------------------------------------------------
# 13 CDF of Release Year
# ---------------------------------------------------
plot(
ecdf(netflix_data$release_year),
main = "CDF of Release Year",
xlab = "Release Year",
ylab = "Cumulative Probability",
col = "blue"
)
# ---------------------------------------------------
# 14 Content Added Per Year
# ---------------------------------------------------
ggplot(netflix_data,
aes(x = release_year)) +
geom_density(fill = "lightblue") +
labs(
title = "Density Plot of Release Years",
x = "Release Year",
y = "Density"
)
# ---------------------------------------------------
# 15 Create Release Groups
# ---------------------------------------------------
netflix_data$release_group <- cut(
netflix_data$release_year,
breaks = c(1980, 2000, 2010, 2025),
labels = c("Old", "Middle", "New")
)
ggplot(netflix_data,
aes(x = release_group,
fill = type)) +
geom_bar() +
labs(
title = "Release Groups",
x = "Group",
y = "Count"
)
# ---------------------------------------------------
# 16 ANOVA for Release Year and Type
# ---------------------------------------------------
anova_type <- aov(
release_year ~ type,
data = netflix_data
)
summary(anova_type)
## Df Sum Sq Mean Sq F value Pr(>F)
## type 1 22615 22615 300.7 <2e-16 ***
## Residuals 8805 662318 75
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
# ---------------------------------------------------
# 17 Correlation between Release Year and Duration
# ---------------------------------------------------
# Convert duration into numeric
netflix_data$duration_num <- as.numeric(gsub("[^0-9]", "", netflix_data$duration))
cor(
netflix_data$release_year,
netflix_data$duration_num,
use = "complete.obs"
)
## [1] -0.2491815
# ---------------------------------------------------
# 18 Single Regression
# ---------------------------------------------------
single_reg <- lm(
duration_num ~ release_year,
data = netflix_data
)
summary(single_reg)
##
## Call:
## lm(formula = duration_num ~ release_year, data = netflix_data)
##
## Residuals:
## Min 1Q Median 3Q Max
## -196.87 -59.49 16.91 36.77 247.64
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 2961.22063 119.77939 24.72 <2e-16 ***
## release_year -1.43551 0.05947 -24.14 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 49.21 on 8802 degrees of freedom
## (3 observations deleted due to missingness)
## Multiple R-squared: 0.06209, Adjusted R-squared: 0.06198
## F-statistic: 582.7 on 1 and 8802 DF, p-value: < 2.2e-16
```{r}# ————————————————— # 19 Regression Plot # —————————————————
ggplot(netflix_data, aes(x = release_year, y = duration_num)) + geom_point(color = “purple”) + geom_smooth( method = “lm”, se = FALSE, color = “red” ) + labs( title = “Release Year vs Duration”, x = “Release Year”, y = “Duration” )
``` r
# ---------------------------------------------------
# 20 Multiple Regression
# ---------------------------------------------------
multiple_reg <- lm(
duration_num ~ release_year,
data = netflix_data
)
summary(multiple_reg)
##
## Call:
## lm(formula = duration_num ~ release_year, data = netflix_data)
##
## Residuals:
## Min 1Q Median 3Q Max
## -196.87 -59.49 16.91 36.77 247.64
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 2961.22063 119.77939 24.72 <2e-16 ***
## release_year -1.43551 0.05947 -24.14 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 49.21 on 8802 degrees of freedom
## (3 observations deleted due to missingness)
## Multiple R-squared: 0.06209, Adjusted R-squared: 0.06198
## F-statistic: 582.7 on 1 and 8802 DF, p-value: < 2.2e-16
# ---------------------------------------------------
# 21 Polynomial Regression Degree 2
# ---------------------------------------------------
poly_reg <- lm(
duration_num ~ poly(release_year, 2),
data = netflix_data
)
summary(poly_reg)
##
## Call:
## lm(formula = duration_num ~ poly(release_year, 2), data = netflix_data)
##
## Residuals:
## Min 1Q Median 3Q Max
## -115.86 -54.97 13.97 37.03 249.83
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 69.8480 0.5173 135.02 <2e-16 ***
## poly(release_year, 2)1 -1188.0764 48.5401 -24.48 <2e-16 ***
## poly(release_year, 2)2 -764.0140 48.5419 -15.74 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 48.54 on 8801 degrees of freedom
## (3 observations deleted due to missingness)
## Multiple R-squared: 0.08777, Adjusted R-squared: 0.08756
## F-statistic: 423.4 on 2 and 8801 DF, p-value: < 2.2e-16
# ---------------------------------------------------
# 22 Polynomial Regression Plot
# ---------------------------------------------------
ggplot(netflix_data,
aes(x = release_year,
y = duration_num)) +
geom_point(color = "darkgreen") +
stat_smooth(
method = "lm",
formula = y ~ poly(x, 2),
se = FALSE,
color = "blue"
) +
labs(
title = "Polynomial Regression",
x = "Release Year",
y = "Duration"
)
## Warning: Removed 3 rows containing non-finite outside the scale range
## (`stat_smooth()`).
## Warning: Removed 3 rows containing missing values or values outside the scale range
## (`geom_point()`).
# ---------------------------------------------------
# 23 QQ Plot
# ---------------------------------------------------
qqnorm(netflix_data$duration_num)
qqline(
netflix_data$duration_num,
col = "red"
)
# ---------------------------------------------------
# 24 Residual Plot
# ---------------------------------------------------
plot(
single_reg$fitted.values,
single_reg$residuals,
main = "Residual Plot",
xlab = "Fitted Values",
ylab = "Residuals",
col = "blue"
)
abline(h = 0, col = "red")
# ---------------------------------------------------
# 25 Scatter Plot Matrix
# ---------------------------------------------------
pairs(
netflix_data[, c("release_year",
"duration_num")],
main = "Scatter Plot Matrix"
)
# ---------------------------------------------------
# 26 Outlier Detection
# ---------------------------------------------------
boxplot(
netflix_data$duration_num,
main = "Outlier Detection in Duration",
col = "pink"
)
# ---------------------------------------------------
# 27 Top 10 Directors
# ---------------------------------------------------
top_director <- netflix_data %>%
group_by(director) %>%
summarise(total = n()) %>%
arrange(desc(total)) %>%
head(10)
top_director
## # A tibble: 10 × 2
## director total
## <chr> <int>
## 1 "" 2634
## 2 "Rajiv Chilaka" 19
## 3 "Raúl Campos, Jan Suter" 18
## 4 "Marcus Raboy" 16
## 5 "Suhas Kadav" 16
## 6 "Jay Karas" 14
## 7 "Cathy Garcia-Molina" 13
## 8 "Jay Chapman" 12
## 9 "Martin Scorsese" 12
## 10 "Youssef Chahine" 12
# ---------------------------------------------------
# 28 Top Genres
# ---------------------------------------------------
top_genres <- netflix_data %>%
group_by(listed_in) %>%
summarise(total = n()) %>%
arrange(desc(total)) %>%
head(10)
top_genres
## # A tibble: 10 × 2
## listed_in total
## <chr> <int>
## 1 Dramas, International Movies 362
## 2 Documentaries 359
## 3 Stand-Up Comedy 334
## 4 Comedies, Dramas, International Movies 274
## 5 Dramas, Independent Movies, International Movies 252
## 6 Kids' TV 220
## 7 Children & Family Movies 215
## 8 Children & Family Movies, Comedies 201
## 9 Documentaries, International Movies 186
## 10 Dramas, International Movies, Romantic Movies 180
# ---------------------------------------------------
# 29 Content Rating vs Type
# ---------------------------------------------------
ggplot(netflix_data,
aes(x = rating,
fill = type)) +
geom_bar(position = "dodge") +
labs(
title = "Rating vs Type",
x = "Rating",
y = "Count"
)
# ---------------------------------------------------
# 30 Final Distribution of Netflix Content
# ---------------------------------------------------
ggplot(netflix_data,
aes(x = type,
fill = release_group)) +
geom_bar() +
labs(
title = "Netflix Content Distribution",
x = "Type",
y = "Count"
)