I have decided to change my dataset because i wasn’t fully in love with it.
In fact, Movies & Series are something I am extremely passionate about.
In my new work, I will focus on streaming platforms and analyze which ones have the best movies, series, etc. and will help you choose the right platform for you based on your movie taste.
I will join 4 Datasets collected from Kaggle: Netflix, HULU, Amazon, and Disney+
library(tidyverse) #I start by installing the tidyverse package
## ── Attaching packages ─────────────────────────────────────── tidyverse 1.3.2 ──
## ✔ ggplot2 3.4.0 ✔ purrr 1.0.0
## ✔ tibble 3.1.8 ✔ dplyr 1.0.10
## ✔ tidyr 1.2.1 ✔ stringr 1.5.0
## ✔ readr 2.1.3 ✔ forcats 0.5.2
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
amazon <- read.csv('data/amazon.csv')
disney <- read.csv('data/disney.csv')
hulu <- read.csv('data/hulu.csv')
netflix <- read.csv('data/netflix.csv')
amazon$platform <- "amazon"
disney$platform <- "disney"
hulu$platform <- "hulu"
netflix$platform <- "netflix"
df <- rbind(amazon, disney, hulu, netflix)
df_g <- rbind(amazon, disney, hulu, netflix) #dataframe for splitting genres
df_c <- rbind(amazon, disney, hulu, netflix) #dataframe for splitting countries
df$platform <- factor(df$platform)
df_c$platform <- factor(df_c$platform)
df_g$platform <- factor(df_g$platform)
df$type <- factor(df$type)
df_c$type <- factor(df_c$type)
df_g$type <- factor(df_g$type)
df_g$genres <- gsub("\\[", "", df_g$genres)
df_g$genres <- gsub("\\]", "", df_g$genres)
df_g <- separate_rows(df_g, genres, sep = ", ")
df_g$genres <- gsub("'", "", df_g$genres)
df_c$production_countries <- gsub("\\['", "", df_c$production_countries)
df_c$production_countries <- gsub("'\\]", "", df_c$production_countries)
df_c <- separate_rows(df_c, production_countries, sep = ", ")
df_c$production_countries <- gsub("'", "", df_c$production_countries)
df_c
df_g$genres <- factor(df_g$genres)
df_c$production_countries <- factor(df_c$production_countries)
save(df, file = "df.RData")
save(df_g, file = "df_g.RData")
save(df_c, file = "df_c.RData")
df |> dim()
## [1] 19654 16
df_c |> dim()
## [1] 22380 16
df_g |> dim()
## [1] 48377 16
str(df_g)
## tibble [48,377 × 16] (S3: tbl_df/tbl/data.frame)
## $ id : chr [1:48377] "ts20945" "ts20945" "ts20945" "ts20945" ...
## $ title : chr [1:48377] "The Three Stooges" "The Three Stooges" "The Three Stooges" "The Three Stooges" ...
## $ type : Factor w/ 2 levels "MOVIE","SHOW": 2 2 2 2 2 2 1 1 1 1 ...
## $ description : chr [1:48377] "The Three Stooges were an American vaudeville and comedy team active from 1922 until 1970, best known for their"| __truncated__ "The Three Stooges were an American vaudeville and comedy team active from 1922 until 1970, best known for their"| __truncated__ "The Three Stooges were an American vaudeville and comedy team active from 1922 until 1970, best known for their"| __truncated__ "The Three Stooges were an American vaudeville and comedy team active from 1922 until 1970, best known for their"| __truncated__ ...
## $ release_year : int [1:48377] 1934 1934 1934 1934 1934 1934 1926 1926 1926 1926 ...
## $ age_certification : chr [1:48377] "TV-PG" "TV-PG" "TV-PG" "TV-PG" ...
## $ runtime : int [1:48377] 19 19 19 19 19 19 78 78 78 78 ...
## $ genres : Factor w/ 20 levels "","action","animation",..: 4 9 3 2 10 12 2 7 19 20 ...
## $ production_countries: chr [1:48377] "['US']" "['US']" "['US']" "['US']" ...
## $ seasons : num [1:48377] 26 26 26 26 26 26 NA NA NA NA ...
## $ imdb_id : chr [1:48377] "tt0850645" "tt0850645" "tt0850645" "tt0850645" ...
## $ imdb_score : num [1:48377] 8.6 8.6 8.6 8.6 8.6 8.6 8.2 8.2 8.2 8.2 ...
## $ imdb_votes : num [1:48377] 1092 1092 1092 1092 1092 ...
## $ tmdb_popularity : num [1:48377] 15.4 15.4 15.4 15.4 15.4 ...
## $ tmdb_score : num [1:48377] 7.6 7.6 7.6 7.6 7.6 7.6 8 8 8 8 ...
## $ platform : Factor w/ 4 levels "amazon","disney",..: 1 1 1 1 1 1 1 1 1 1 ...
For more details about the app and instructions, you can check the Welcome page of the app!