#Loading libraries
library(tidyr)
library(readr)
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
#Importing dataset
Netflix <- read_csv("Netflix (1).csv")
## Rows: 6234 Columns: 12
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (10): type, title, director, cast, country, date_added, rating, duration...
## dbl (2): show_id, release_year
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
#Separate actors in the cast column and renaming the column
Netflix_Actor <- Netflix %>%
separate_rows(cast, sep = ", ") %>%
drop_na(cast) %>%
rename(actor = cast)
head(Netflix_Actor)
## # A tibble: 6 × 12
## show_id type title director actor country date_added release_year rating
## <dbl> <chr> <chr> <chr> <chr> <chr> <chr> <dbl> <chr>
## 1 81145628 Movie Norm of … Richard… Alan… United… September… 2019 TV-PG
## 2 81145628 Movie Norm of … Richard… Andr… United… September… 2019 TV-PG
## 3 81145628 Movie Norm of … Richard… Bria… United… September… 2019 TV-PG
## 4 81145628 Movie Norm of … Richard… Cole… United… September… 2019 TV-PG
## 5 81145628 Movie Norm of … Richard… Jenn… United… September… 2019 TV-PG
## 6 81145628 Movie Norm of … Richard… Jona… United… September… 2019 TV-PG
## # ℹ 3 more variables: duration <chr>, listed_in <chr>, description <chr>
#Finding the 6 actors that have the most appearances on TV show
Netflix_Actor %>%
select(type, actor) %>%
filter(type == "TV Show") %>%
group_by(actor) %>%
count(sort = TRUE) %>%
ungroup() %>%
head(6)
## # A tibble: 6 × 2
## actor n
## <chr> <int>
## 1 Takahiro Sakurai 18
## 2 Yuki Kaji 16
## 3 Daisuke Ono 14
## 4 David Attenborough 14
## 5 Ashleigh Ball 12
## 6 Hiroshi Kamiya 12
summary(cars)
## speed dist
## Min. : 4.0 Min. : 2.00
## 1st Qu.:12.0 1st Qu.: 26.00
## Median :15.0 Median : 36.00
## Mean :15.4 Mean : 42.98
## 3rd Qu.:19.0 3rd Qu.: 56.00
## Max. :25.0 Max. :120.00