# Load Libraries
library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library(ggplot2)
# Load Netflix Dataset
netflix_data <- read.csv("C:/Users/adars/Downloads/archive (6)/netflix_titles.csv")

View(netflix_data)
# ---------------------------------------------------
# 1 Structure of Dataset
# ---------------------------------------------------

str(netflix_data)
## 'data.frame':    8807 obs. of  12 variables:
##  $ show_id     : chr  "s1" "s2" "s3" "s4" ...
##  $ type        : chr  "Movie" "TV Show" "TV Show" "TV Show" ...
##  $ title       : chr  "Dick Johnson Is Dead" "Blood & Water" "Ganglands" "Jailbirds New Orleans" ...
##  $ director    : chr  "Kirsten Johnson" "" "Julien Leclercq" "" ...
##  $ cast        : chr  "" "Ama Qamata, Khosi Ngema, Gail Mabalane, Thabang Molaba, Dillon Windvogel, Natasha Thahane, Arno Greeff, Xolile "| __truncated__ "Sami Bouajila, Tracy Gotoas, Samuel Jouy, Nabiha Akkari, Sofia Lesaffre, Salim Kechiouche, Noureddine Farihi, G"| __truncated__ "" ...
##  $ country     : chr  "United States" "South Africa" "" "" ...
##  $ date_added  : chr  "September 25, 2021" "September 24, 2021" "September 24, 2021" "September 24, 2021" ...
##  $ release_year: int  2020 2021 2021 2021 2021 2021 2021 1993 2021 2021 ...
##  $ rating      : chr  "PG-13" "TV-MA" "TV-MA" "TV-MA" ...
##  $ duration    : chr  "90 min" "2 Seasons" "1 Season" "1 Season" ...
##  $ listed_in   : chr  "Documentaries" "International TV Shows, TV Dramas, TV Mysteries" "Crime TV Shows, International TV Shows, TV Action & Adventure" "Docuseries, Reality TV" ...
##  $ description : chr  "As her father nears the end of his life, filmmaker Kirsten Johnson stages his death in inventive and comical wa"| __truncated__ "After crossing paths at a party, a Cape Town teen sets out to prove whether a private-school swimming star is h"| __truncated__ "To protect his family from a powerful drug lord, skilled thief Mehdi and his expert team of robbers are pulled "| __truncated__ "Feuds, flirtations and toilet talk go down among the incarcerated women at the Orleans Justice Center in New Or"| __truncated__ ...
names(netflix_data)
##  [1] "show_id"      "type"         "title"        "director"     "cast"        
##  [6] "country"      "date_added"   "release_year" "rating"       "duration"    
## [11] "listed_in"    "description"
head(netflix_data)
##   show_id    type                 title        director
## 1      s1   Movie  Dick Johnson Is Dead Kirsten Johnson
## 2      s2 TV Show         Blood & Water                
## 3      s3 TV Show             Ganglands Julien Leclercq
## 4      s4 TV Show Jailbirds New Orleans                
## 5      s5 TV Show          Kota Factory                
## 6      s6 TV Show         Midnight Mass   Mike Flanagan
##                                                                                                                                                                                                                                                                                                              cast
## 1                                                                                                                                                                                                                                                                                                                
## 2 Ama Qamata, Khosi Ngema, Gail Mabalane, Thabang Molaba, Dillon Windvogel, Natasha Thahane, Arno Greeff, Xolile Tshabalala, Getmore Sithole, Cindy Mahlangu, Ryle De Morny, Greteli Fincham, Sello Maake Ka-Ncube, Odwa Gwanya, Mekaila Mathys, Sandi Schultz, Duane Williams, Shamilla Miller, Patrick Mofokeng
## 3                                                                                                                                                             Sami Bouajila, Tracy Gotoas, Samuel Jouy, Nabiha Akkari, Sofia Lesaffre, Salim Kechiouche, Noureddine Farihi, Geert Van Rampelberg, Bakary Diombera
## 4                                                                                                                                                                                                                                                                                                                
## 5                                                                                                                                                                                                        Mayur More, Jitendra Kumar, Ranjan Raj, Alam Khan, Ahsaas Channa, Revathi Pillai, Urvi Singh, Arun Kumar
## 6                                                                        Kate Siegel, Zach Gilford, Hamish Linklater, Henry Thomas, Kristin Lehman, Samantha Sloyan, Igby Rigney, Rahul Kohli, Annarah Cymone, Annabeth Gish, Alex Essoe, Rahul Abburi, Matt Biedel, Michael Trucco, Crystal Balint, Louis Oliver
##         country         date_added release_year rating  duration
## 1 United States September 25, 2021         2020  PG-13    90 min
## 2  South Africa September 24, 2021         2021  TV-MA 2 Seasons
## 3               September 24, 2021         2021  TV-MA  1 Season
## 4               September 24, 2021         2021  TV-MA  1 Season
## 5         India September 24, 2021         2021  TV-MA 2 Seasons
## 6               September 24, 2021         2021  TV-MA  1 Season
##                                                       listed_in
## 1                                                 Documentaries
## 2               International TV Shows, TV Dramas, TV Mysteries
## 3 Crime TV Shows, International TV Shows, TV Action & Adventure
## 4                                        Docuseries, Reality TV
## 5        International TV Shows, Romantic TV Shows, TV Comedies
## 6                            TV Dramas, TV Horror, TV Mysteries
##                                                                                                                                                description
## 1 As her father nears the end of his life, filmmaker Kirsten Johnson stages his death in inventive and comical ways to help them both face the inevitable.
## 2      After crossing paths at a party, a Cape Town teen sets out to prove whether a private-school swimming star is her sister who was abducted at birth.
## 3       To protect his family from a powerful drug lord, skilled thief Mehdi and his expert team of robbers are pulled into a violent and deadly turf war.
## 4      Feuds, flirtations and toilet talk go down among the incarcerated women at the Orleans Justice Center in New Orleans on this gritty reality series.
## 5 In a city of coaching centers known to train India’s finest collegiate minds, an earnest but unexceptional student and his friends navigate campus life.
## 6 The arrival of a charismatic young priest brings glorious miracles, ominous mysteries and renewed religious fervor to a dying town desperate to believe.
tail(netflix_data)
##      show_id    type       title        director
## 8802   s8802   Movie     Zinzana Majid Al Ansari
## 8803   s8803   Movie      Zodiac   David Fincher
## 8804   s8804 TV Show Zombie Dumb                
## 8805   s8805   Movie  Zombieland Ruben Fleischer
## 8806   s8806   Movie        Zoom    Peter Hewitt
## 8807   s8807   Movie      Zubaan     Mozez Singh
##                                                                                                                                                             cast
## 8802                                                                                          Ali Suliman, Saleh Bakri, Yasa, Ali Al-Jabri, Mansoor Alfeeli, Ahd
## 8803 Mark Ruffalo, Jake Gyllenhaal, Robert Downey Jr., Anthony Edwards, Brian Cox, Elias Koteas, Donal Logue, John Carroll Lynch, Dermot Mulroney, Chloë Sevigny
## 8804                                                                                                                                                            
## 8805                                                         Jesse Eisenberg, Woody Harrelson, Emma Stone, Abigail Breslin, Amber Heard, Bill Murray, Derek Graf
## 8806                                     Tim Allen, Courteney Cox, Chevy Chase, Kate Mara, Ryan Newman, Michael Cassidy, Spencer Breslin, Rip Torn, Kevin Zegers
## 8807                       Vicky Kaushal, Sarah-Jane Dias, Raaghav Chanana, Manish Chaudhary, Meghna Malik, Malkeet Rauni, Anita Shabdish, Chittaranjan Tripathy
##                           country        date_added release_year rating
## 8802 United Arab Emirates, Jordan     March 9, 2016         2015  TV-MA
## 8803                United States November 20, 2019         2007      R
## 8804                                   July 1, 2019         2018  TV-Y7
## 8805                United States  November 1, 2019         2009      R
## 8806                United States  January 11, 2020         2006     PG
## 8807                        India     March 2, 2019         2015  TV-14
##       duration                                      listed_in
## 8802    96 min        Dramas, International Movies, Thrillers
## 8803   158 min                 Cult Movies, Dramas, Thrillers
## 8804 2 Seasons         Kids' TV, Korean TV Shows, TV Comedies
## 8805    88 min                        Comedies, Horror Movies
## 8806    88 min             Children & Family Movies, Comedies
## 8807   111 min Dramas, International Movies, Music & Musicals
##                                                                                                                                                 description
## 8802               Recovering alcoholic Talal wakes up inside a small-town police station cell, where he's subject to the mind games of a psychotic sadist.
## 8803 A political cartoonist, a crime reporter and a pair of cops investigate San Francisco's infamous Zodiac Killer in this thriller based on a true story.
## 8804                               While living alone in a spooky town, a young girl befriends a motley crew of zombie children with diverse personalities.
## 8805              Looking to survive in a world taken over by zombies, a dorky college student teams with an urban roughneck and a pair of grifter sisters.
## 8806  Dragged from civilian life, a former superhero must train a new crop of youthful saviors when the military preps for an attack by a familiar villain.
## 8807                A scrappy but poor boy worms his way into a tycoon's dysfunctional family, while facing his fear of music and the truth about his past.
summary(netflix_data)
##       show_id            type            title           director   
##  Length   :8807   Length   :8807   Length   :8807   Length   :8807  
##  N.unique :8807   N.unique :   2   N.unique :8807   N.unique :4529  
##  N.blank  :   0   N.blank  :   0   N.blank  :   0   N.blank  :2634  
##  Min.nchar:   2   Min.nchar:   5   Min.nchar:   1   Min.nchar:   0  
##  Max.nchar:   5   Max.nchar:   7   Max.nchar: 104   Max.nchar: 208  
##                                                                     
##         cast           country         date_added    release_year 
##  Length   :8807   Length   :8807   Length   :8807   Min.   :1925  
##  N.unique :7693   N.unique : 749   N.unique :1768   1st Qu.:2013  
##  N.blank  : 825   N.blank  : 831   N.blank  :  10   Median :2017  
##  Min.nchar:   0   Min.nchar:   0   Min.nchar:   0   Mean   :2014  
##  Max.nchar: 771   Max.nchar: 123   Max.nchar:  19   3rd Qu.:2019  
##                                                     Max.   :2021  
##        rating          duration        listed_in       description  
##  Length   :8807   Length   :8807   Length   :8807   Length   :8807  
##  N.unique :  18   N.unique : 221   N.unique : 514   N.unique :8775  
##  N.blank  :   4   N.blank  :   3   N.blank  :   0   N.blank  :   0  
##  Min.nchar:   0   Min.nchar:   0   Min.nchar:   6   Min.nchar:  61  
##  Max.nchar:   8   Max.nchar:  10   Max.nchar:  79   Max.nchar: 248  
## 
dim(netflix_data)
## [1] 8807   12
# ---------------------------------------------------
# 2 Convert Categorical Variables
# ---------------------------------------------------

netflix_data$type <- as.factor(netflix_data$type)
netflix_data$rating <- as.factor(netflix_data$rating)
netflix_data$country <- as.factor(netflix_data$country)
# ---------------------------------------------------
# 3 Missing Values
# ---------------------------------------------------

colSums(is.na(netflix_data))
##      show_id         type        title     director         cast      country 
##            0            0            0            0            0            0 
##   date_added release_year       rating     duration    listed_in  description 
##            0            0            0            0            0            0
# ---------------------------------------------------
# 4 Filter Movies Released After 2018
# ---------------------------------------------------

latest_movies <- netflix_data %>%
  filter(release_year > 2018) %>%
  select(title, type, release_year)

head(latest_movies)
##                   title    type release_year
## 1  Dick Johnson Is Dead   Movie         2020
## 2         Blood & Water TV Show         2021
## 3             Ganglands TV Show         2021
## 4 Jailbirds New Orleans TV Show         2021
## 5          Kota Factory TV Show         2021
## 6         Midnight Mass TV Show         2021
# ---------------------------------------------------
# 5 Top 10 Latest Releases
# ---------------------------------------------------

top_latest <- netflix_data %>%
  arrange(desc(release_year)) %>%
  head(10)

top_latest
##    show_id    type                               title
## 1       s2 TV Show                       Blood & Water
## 2       s3 TV Show                           Ganglands
## 3       s4 TV Show               Jailbirds New Orleans
## 4       s5 TV Show                        Kota Factory
## 5       s6 TV Show                       Midnight Mass
## 6       s7   Movie    My Little Pony: A New Generation
## 7       s9 TV Show       The Great British Baking Show
## 8      s10   Movie                        The Starling
## 9      s11 TV Show Vendetta: Truth, Lies and The Mafia
## 10     s12 TV Show                    Bangkok Breaking
##                         director
## 1                               
## 2                Julien Leclercq
## 3                               
## 4                               
## 5                  Mike Flanagan
## 6  Robert Cullen, José Luis Ucha
## 7                Andy Devonshire
## 8                 Theodore Melfi
## 9                               
## 10             Kongkiat Komesiri
##                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                    cast
## 1                                                                                                                                                                                                                                       Ama Qamata, Khosi Ngema, Gail Mabalane, Thabang Molaba, Dillon Windvogel, Natasha Thahane, Arno Greeff, Xolile Tshabalala, Getmore Sithole, Cindy Mahlangu, Ryle De Morny, Greteli Fincham, Sello Maake Ka-Ncube, Odwa Gwanya, Mekaila Mathys, Sandi Schultz, Duane Williams, Shamilla Miller, Patrick Mofokeng
## 2                                                                                                                                                                                                                                                                                                                                                                                                   Sami Bouajila, Tracy Gotoas, Samuel Jouy, Nabiha Akkari, Sofia Lesaffre, Salim Kechiouche, Noureddine Farihi, Geert Van Rampelberg, Bakary Diombera
## 3                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                      
## 4                                                                                                                                                                                                                                                                                                                                                                                                                                              Mayur More, Jitendra Kumar, Ranjan Raj, Alam Khan, Ahsaas Channa, Revathi Pillai, Urvi Singh, Arun Kumar
## 5                                                                                                                                                                                                                                                                                                              Kate Siegel, Zach Gilford, Hamish Linklater, Henry Thomas, Kristin Lehman, Samantha Sloyan, Igby Rigney, Rahul Kohli, Annarah Cymone, Annabeth Gish, Alex Essoe, Rahul Abburi, Matt Biedel, Michael Trucco, Crystal Balint, Louis Oliver
## 6                                                                                                                                                                                                                                                                                                                                                                                                     Vanessa Hudgens, Kimiko Glenn, James Marsden, Sofia Carson, Liza Koshy, Ken Jeong, Elizabeth Perkins, Jane Krakowski, Michael McKean, Phil LaMarr
## 7                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                 Mel Giedroyc, Sue Perkins, Mary Berry, Paul Hollywood
## 8                                                                                                                                                                                                                                                                                                                                                                                Melissa McCarthy, Chris O'Dowd, Kevin Kline, Timothy Olyphant, Daveed Diggs, Skyler Gisondo, Laura Harrier, Rosalind Chao, Kimberly Quinn, Loretta Devine, Ravi Kapoor
## 9                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                      
## 10 Sukollawat Kanarot, Sushar Manaying, Pavarit Mongkolpisit, Sahajak Boonthanakit, Suthipongse Thatphithakkul, Bhasaworn Bawronkirati, Daweerit Chullasapya, Waratthaya Wongchayaporn, Kittiphoom Wongpentak, Abhicha Thanachanun, Nophand Boonyai, Kittipong Khamsat, Arisara Wongchalee, Jaytiya Naiwattanakul, Pantipa Arunwattanachai, Panupan Jantanawong, Kungtap Saelim, Phumphat Chartsuriyakiat, Issara Veranitinunt, Keerati Sivakuae, Panjai Sirisuwan, Supranee Charoenpol, Suda Chuenban, Visaka Banhansupavat, Pitchatorn Santinatornkul
##           country         date_added release_year rating  duration
## 1    South Africa September 24, 2021         2021  TV-MA 2 Seasons
## 2                 September 24, 2021         2021  TV-MA  1 Season
## 3                 September 24, 2021         2021  TV-MA  1 Season
## 4           India September 24, 2021         2021  TV-MA 2 Seasons
## 5                 September 24, 2021         2021  TV-MA  1 Season
## 6                 September 24, 2021         2021     PG    91 min
## 7  United Kingdom September 24, 2021         2021  TV-14 9 Seasons
## 8   United States September 24, 2021         2021  PG-13   104 min
## 9                 September 24, 2021         2021  TV-MA  1 Season
## 10                September 23, 2021         2021  TV-MA  1 Season
##                                                        listed_in
## 1                International TV Shows, TV Dramas, TV Mysteries
## 2  Crime TV Shows, International TV Shows, TV Action & Adventure
## 3                                         Docuseries, Reality TV
## 4         International TV Shows, Romantic TV Shows, TV Comedies
## 5                             TV Dramas, TV Horror, TV Mysteries
## 6                                       Children & Family Movies
## 7                                   British TV Shows, Reality TV
## 8                                               Comedies, Dramas
## 9             Crime TV Shows, Docuseries, International TV Shows
## 10 Crime TV Shows, International TV Shows, TV Action & Adventure
##                                                                                                                                                  description
## 1        After crossing paths at a party, a Cape Town teen sets out to prove whether a private-school swimming star is her sister who was abducted at birth.
## 2         To protect his family from a powerful drug lord, skilled thief Mehdi and his expert team of robbers are pulled into a violent and deadly turf war.
## 3        Feuds, flirtations and toilet talk go down among the incarcerated women at the Orleans Justice Center in New Orleans on this gritty reality series.
## 4   In a city of coaching centers known to train India’s finest collegiate minds, an earnest but unexceptional student and his friends navigate campus life.
## 5   The arrival of a charismatic young priest brings glorious miracles, ominous mysteries and renewed religious fervor to a dying town desperate to believe.
## 6  Equestria's divided. But a bright-eyed hero believes Earth Ponies, Pegasi and Unicorns should be pals — and, hoof to heart, she’s determined to prove it.
## 7           A talented batch of amateur bakers face off in a 10-week competition, whipping up their best dishes in the hopes of being named the U.K.'s best.
## 8    A woman adjusting to life after a loss contends with a feisty bird that's taken over her garden — and a husband who's struggling to find a way forward.
## 9   Sicily boasts a bold "Anti-Mafia" coalition. But what happens when those trying to bring down organized crime are accused of being criminals themselves?
## 10                       Struggling to earn a living in Bangkok, a man joins an emergency rescue service and realizes he must unravel a citywide conspiracy.
# ---------------------------------------------------
# 6 Ranking Shows by Release Year
# ---------------------------------------------------

rank_show <- netflix_data %>%
  arrange(desc(release_year)) %>%
  mutate(rank = row_number())

head(rank_show)
##   show_id    type                            title
## 1      s2 TV Show                    Blood & Water
## 2      s3 TV Show                        Ganglands
## 3      s4 TV Show            Jailbirds New Orleans
## 4      s5 TV Show                     Kota Factory
## 5      s6 TV Show                    Midnight Mass
## 6      s7   Movie My Little Pony: A New Generation
##                        director
## 1                              
## 2               Julien Leclercq
## 3                              
## 4                              
## 5                 Mike Flanagan
## 6 Robert Cullen, José Luis Ucha
##                                                                                                                                                                                                                                                                                                              cast
## 1 Ama Qamata, Khosi Ngema, Gail Mabalane, Thabang Molaba, Dillon Windvogel, Natasha Thahane, Arno Greeff, Xolile Tshabalala, Getmore Sithole, Cindy Mahlangu, Ryle De Morny, Greteli Fincham, Sello Maake Ka-Ncube, Odwa Gwanya, Mekaila Mathys, Sandi Schultz, Duane Williams, Shamilla Miller, Patrick Mofokeng
## 2                                                                                                                                                             Sami Bouajila, Tracy Gotoas, Samuel Jouy, Nabiha Akkari, Sofia Lesaffre, Salim Kechiouche, Noureddine Farihi, Geert Van Rampelberg, Bakary Diombera
## 3                                                                                                                                                                                                                                                                                                                
## 4                                                                                                                                                                                                        Mayur More, Jitendra Kumar, Ranjan Raj, Alam Khan, Ahsaas Channa, Revathi Pillai, Urvi Singh, Arun Kumar
## 5                                                                        Kate Siegel, Zach Gilford, Hamish Linklater, Henry Thomas, Kristin Lehman, Samantha Sloyan, Igby Rigney, Rahul Kohli, Annarah Cymone, Annabeth Gish, Alex Essoe, Rahul Abburi, Matt Biedel, Michael Trucco, Crystal Balint, Louis Oliver
## 6                                                                                                                                                               Vanessa Hudgens, Kimiko Glenn, James Marsden, Sofia Carson, Liza Koshy, Ken Jeong, Elizabeth Perkins, Jane Krakowski, Michael McKean, Phil LaMarr
##        country         date_added release_year rating  duration
## 1 South Africa September 24, 2021         2021  TV-MA 2 Seasons
## 2              September 24, 2021         2021  TV-MA  1 Season
## 3              September 24, 2021         2021  TV-MA  1 Season
## 4        India September 24, 2021         2021  TV-MA 2 Seasons
## 5              September 24, 2021         2021  TV-MA  1 Season
## 6              September 24, 2021         2021     PG    91 min
##                                                       listed_in
## 1               International TV Shows, TV Dramas, TV Mysteries
## 2 Crime TV Shows, International TV Shows, TV Action & Adventure
## 3                                        Docuseries, Reality TV
## 4        International TV Shows, Romantic TV Shows, TV Comedies
## 5                            TV Dramas, TV Horror, TV Mysteries
## 6                                      Children & Family Movies
##                                                                                                                                                 description
## 1       After crossing paths at a party, a Cape Town teen sets out to prove whether a private-school swimming star is her sister who was abducted at birth.
## 2        To protect his family from a powerful drug lord, skilled thief Mehdi and his expert team of robbers are pulled into a violent and deadly turf war.
## 3       Feuds, flirtations and toilet talk go down among the incarcerated women at the Orleans Justice Center in New Orleans on this gritty reality series.
## 4  In a city of coaching centers known to train India’s finest collegiate minds, an earnest but unexceptional student and his friends navigate campus life.
## 5  The arrival of a charismatic young priest brings glorious miracles, ominous mysteries and renewed religious fervor to a dying town desperate to believe.
## 6 Equestria's divided. But a bright-eyed hero believes Earth Ponies, Pegasi and Unicorns should be pals — and, hoof to heart, she’s determined to prove it.
##   rank
## 1    1
## 2    2
## 3    3
## 4    4
## 5    5
## 6    6
# ---------------------------------------------------
# 7 Count of Movies and TV Shows
# ---------------------------------------------------

ggplot(netflix_data,
       aes(x = type,
           fill = type)) +
  geom_bar() +
  labs(
    title = "Movies vs TV Shows",
    x = "Type",
    y = "Count"
  )

# ---------------------------------------------------
# 8 Release Year Distribution
# ---------------------------------------------------

ggplot(netflix_data,
       aes(x = release_year)) +
  geom_histogram(
    binwidth = 2,
    fill = "red",
    color = "black"
  ) +
  labs(
    title = "Release Year Distribution",
    x = "Release Year",
    y = "Count"
  )

# ---------------------------------------------------
# 9 Rating Distribution
# ---------------------------------------------------

ggplot(netflix_data,
       aes(x = rating,
           fill = rating)) +
  geom_bar() +
  labs(
    title = "Content Rating Distribution",
    x = "Rating",
    y = "Count"
  )

# ---------------------------------------------------
# 10 Movies vs Release Year
# ---------------------------------------------------

ggplot(netflix_data,
       aes(x = release_year,
           fill = type)) +
  geom_histogram(binwidth = 3) +
  labs(
    title = "Movies and TV Shows by Release Year",
    x = "Release Year",
    y = "Count"
  )

# ---------------------------------------------------
# 11 Top Countries Producing Content
# ---------------------------------------------------

top_country <- netflix_data %>%
  group_by(country) %>%
  summarise(total = n()) %>%
  arrange(desc(total)) %>%
  head(10)

top_country
## # A tibble: 10 × 2
##    country          total
##    <fct>            <int>
##  1 "United States"   2818
##  2 "India"            972
##  3 ""                 831
##  4 "United Kingdom"   419
##  5 "Japan"            245
##  6 "South Korea"      199
##  7 "Canada"           181
##  8 "Spain"            145
##  9 "France"           124
## 10 "Mexico"           110
ggplot(top_country,
       aes(x = reorder(country, total),
           y = total)) +
  geom_bar(
    stat = "identity",
    fill = "blue"
  ) +
  coord_flip() +
  labs(
    title = "Top Countries Producing Netflix Content",
    x = "Country",
    y = "Count"
  )

# ---------------------------------------------------
# 12 TV Shows vs Movies Boxplot
# ---------------------------------------------------

ggplot(netflix_data,
       aes(x = type,
           y = release_year,
           fill = type)) +
  geom_boxplot() +
  labs(
    title = "Release Year by Type",
    x = "Type",
    y = "Release Year"
  )

# ---------------------------------------------------
# 13 CDF of Release Year
# ---------------------------------------------------

plot(
  ecdf(netflix_data$release_year),
  main = "CDF of Release Year",
  xlab = "Release Year",
  ylab = "Cumulative Probability",
  col = "blue"
)

# ---------------------------------------------------
# 14 Content Added Per Year
# ---------------------------------------------------

ggplot(netflix_data,
       aes(x = release_year)) +
  geom_density(fill = "lightblue") +
  labs(
    title = "Density Plot of Release Years",
    x = "Release Year",
    y = "Density"
  )

# ---------------------------------------------------
# 15 Create Release Groups
# ---------------------------------------------------

netflix_data$release_group <- cut(
  netflix_data$release_year,
  breaks = c(1980, 2000, 2010, 2025),
  labels = c("Old", "Middle", "New")
)

ggplot(netflix_data,
       aes(x = release_group,
           fill = type)) +
  geom_bar() +
  labs(
    title = "Release Groups",
    x = "Group",
    y = "Count"
  )

# ---------------------------------------------------
# 16 ANOVA for Release Year and Type
# ---------------------------------------------------

anova_type <- aov(
  release_year ~ type,
  data = netflix_data
)

summary(anova_type)
##               Df Sum Sq Mean Sq F value Pr(>F)    
## type           1  22615   22615   300.7 <2e-16 ***
## Residuals   8805 662318      75                   
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
# ---------------------------------------------------
# 17 Correlation between Release Year and Duration
# ---------------------------------------------------

# Convert duration into numeric
netflix_data$duration_num <- as.numeric(gsub("[^0-9]", "", netflix_data$duration))

cor(
  netflix_data$release_year,
  netflix_data$duration_num,
  use = "complete.obs"
)
## [1] -0.2491815
# ---------------------------------------------------
# 18 Single Regression
# ---------------------------------------------------

single_reg <- lm(
  duration_num ~ release_year,
  data = netflix_data
)

summary(single_reg)
## 
## Call:
## lm(formula = duration_num ~ release_year, data = netflix_data)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -196.87  -59.49   16.91   36.77  247.64 
## 
## Coefficients:
##                Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  2961.22063  119.77939   24.72   <2e-16 ***
## release_year   -1.43551    0.05947  -24.14   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 49.21 on 8802 degrees of freedom
##   (3 observations deleted due to missingness)
## Multiple R-squared:  0.06209,    Adjusted R-squared:  0.06198 
## F-statistic: 582.7 on 1 and 8802 DF,  p-value: < 2.2e-16

```{r}# ————————————————— # 19 Regression Plot # —————————————————

ggplot(netflix_data, aes(x = release_year, y = duration_num)) + geom_point(color = “purple”) + geom_smooth( method = “lm”, se = FALSE, color = “red” ) + labs( title = “Release Year vs Duration”, x = “Release Year”, y = “Duration” )



``` r
# ---------------------------------------------------
# 20 Multiple Regression
# ---------------------------------------------------

multiple_reg <- lm(
  duration_num ~ release_year,
  data = netflix_data
)

summary(multiple_reg)
## 
## Call:
## lm(formula = duration_num ~ release_year, data = netflix_data)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -196.87  -59.49   16.91   36.77  247.64 
## 
## Coefficients:
##                Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  2961.22063  119.77939   24.72   <2e-16 ***
## release_year   -1.43551    0.05947  -24.14   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 49.21 on 8802 degrees of freedom
##   (3 observations deleted due to missingness)
## Multiple R-squared:  0.06209,    Adjusted R-squared:  0.06198 
## F-statistic: 582.7 on 1 and 8802 DF,  p-value: < 2.2e-16
# ---------------------------------------------------
# 21 Polynomial Regression Degree 2
# ---------------------------------------------------

poly_reg <- lm(
  duration_num ~ poly(release_year, 2),
  data = netflix_data
)

summary(poly_reg)
## 
## Call:
## lm(formula = duration_num ~ poly(release_year, 2), data = netflix_data)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -115.86  -54.97   13.97   37.03  249.83 
## 
## Coefficients:
##                          Estimate Std. Error t value Pr(>|t|)    
## (Intercept)               69.8480     0.5173  135.02   <2e-16 ***
## poly(release_year, 2)1 -1188.0764    48.5401  -24.48   <2e-16 ***
## poly(release_year, 2)2  -764.0140    48.5419  -15.74   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 48.54 on 8801 degrees of freedom
##   (3 observations deleted due to missingness)
## Multiple R-squared:  0.08777,    Adjusted R-squared:  0.08756 
## F-statistic: 423.4 on 2 and 8801 DF,  p-value: < 2.2e-16
# ---------------------------------------------------
# 22 Polynomial Regression Plot
# ---------------------------------------------------

ggplot(netflix_data,
       aes(x = release_year,
           y = duration_num)) +
  geom_point(color = "darkgreen") +
  stat_smooth(
    method = "lm",
    formula = y ~ poly(x, 2),
    se = FALSE,
    color = "blue"
  ) +
  labs(
    title = "Polynomial Regression",
    x = "Release Year",
    y = "Duration"
  )
## Warning: Removed 3 rows containing non-finite outside the scale range
## (`stat_smooth()`).
## Warning: Removed 3 rows containing missing values or values outside the scale range
## (`geom_point()`).

# ---------------------------------------------------
# 23 QQ Plot
# ---------------------------------------------------

qqnorm(netflix_data$duration_num)

qqline(
  netflix_data$duration_num,
  col = "red"
)

# ---------------------------------------------------
# 24 Residual Plot
# ---------------------------------------------------

plot(
  single_reg$fitted.values,
  single_reg$residuals,
  main = "Residual Plot",
  xlab = "Fitted Values",
  ylab = "Residuals",
  col = "blue"
)

abline(h = 0, col = "red")

# ---------------------------------------------------
# 25 Scatter Plot Matrix
# ---------------------------------------------------

pairs(
  netflix_data[, c("release_year",
                   "duration_num")],
  main = "Scatter Plot Matrix"
)

# ---------------------------------------------------
# 26 Outlier Detection
# ---------------------------------------------------

boxplot(
  netflix_data$duration_num,
  main = "Outlier Detection in Duration",
  col = "pink"
)

# ---------------------------------------------------
# 27 Top 10 Directors
# ---------------------------------------------------

top_director <- netflix_data %>%
  group_by(director) %>%
  summarise(total = n()) %>%
  arrange(desc(total)) %>%
  head(10)

top_director
## # A tibble: 10 × 2
##    director                 total
##    <chr>                    <int>
##  1 ""                        2634
##  2 "Rajiv Chilaka"             19
##  3 "Raúl Campos, Jan Suter"    18
##  4 "Marcus Raboy"              16
##  5 "Suhas Kadav"               16
##  6 "Jay Karas"                 14
##  7 "Cathy Garcia-Molina"       13
##  8 "Jay Chapman"               12
##  9 "Martin Scorsese"           12
## 10 "Youssef Chahine"           12
# ---------------------------------------------------
# 28 Top Genres
# ---------------------------------------------------

top_genres <- netflix_data %>%
  group_by(listed_in) %>%
  summarise(total = n()) %>%
  arrange(desc(total)) %>%
  head(10)

top_genres
## # A tibble: 10 × 2
##    listed_in                                        total
##    <chr>                                            <int>
##  1 Dramas, International Movies                       362
##  2 Documentaries                                      359
##  3 Stand-Up Comedy                                    334
##  4 Comedies, Dramas, International Movies             274
##  5 Dramas, Independent Movies, International Movies   252
##  6 Kids' TV                                           220
##  7 Children & Family Movies                           215
##  8 Children & Family Movies, Comedies                 201
##  9 Documentaries, International Movies                186
## 10 Dramas, International Movies, Romantic Movies      180
# ---------------------------------------------------
# 29 Content Rating vs Type
# ---------------------------------------------------

ggplot(netflix_data,
       aes(x = rating,
           fill = type)) +
  geom_bar(position = "dodge") +
  labs(
    title = "Rating vs Type",
    x = "Rating",
    y = "Count"
  )

# ---------------------------------------------------
# 30 Final Distribution of Netflix Content
# ---------------------------------------------------

ggplot(netflix_data,
       aes(x = type,
           fill = release_group)) +
  geom_bar() +
  labs(
    title = "Netflix Content Distribution",
    x = "Type",
    y = "Count"
  )