library(robotstxt)
library(rvest)
library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union

Question 5

paths_allowed("https://imdb.com/title/tt7235466/fullcredits?ref_=tt_cl_sm")
##  imdb.com
## [1] TRUE

Question 4 & 6

CastCrew_html <- read_html("https://imdb.com/title/tt7235466/fullcredits?ref_=tt_cl_sm")
CastCrew_html
## {html_document}
## <html xmlns:og="http://ogp.me/ns#" xmlns:fb="http://www.facebook.com/2008/fbml">
## [1] <head>\n<meta http-equiv="Content-Type" content="text/html; charset=UTF-8 ...
## [2] <body id="styleguide-v2" class="fixed">\n            <img height="1" widt ...
table_html <- html_elements(CastCrew_html, "table")
table_html 
## {xml_nodeset (30)}
##  [1] <table class="simpleTable simpleCreditsTable">\n<colgroup>\n<col class=" ...
##  [2] <table class="simpleTable simpleCreditsTable">\n<colgroup>\n<col class=" ...
##  [3] <table class="cast_list">\n<tr><td colspan="4" class="castlist_label"></ ...
##  [4] <table class="simpleTable simpleCreditsTable">\n<colgroup>\n<col class=" ...
##  [5] <table class="simpleTable simpleCreditsTable">\n<colgroup>\n<col class=" ...
##  [6] <table class="simpleTable simpleCreditsTable">\n<colgroup>\n<col class=" ...
##  [7] <table class="simpleTable simpleCreditsTable">\n<colgroup>\n<col class=" ...
##  [8] <table class="simpleTable simpleCreditsTable">\n<colgroup>\n<col class=" ...
##  [9] <table class="simpleTable simpleCreditsTable">\n<colgroup>\n<col class=" ...
## [10] <table class="simpleTable simpleCreditsTable">\n<colgroup>\n<col class=" ...
## [11] <table class="simpleTable simpleCreditsTable">\n<colgroup>\n<col class=" ...
## [12] <table class="simpleTable simpleCreditsTable">\n<colgroup>\n<col class=" ...
## [13] <table class="simpleTable simpleCreditsTable">\n<colgroup>\n<col class=" ...
## [14] <table class="simpleTable simpleCreditsTable">\n<colgroup>\n<col class=" ...
## [15] <table class="simpleTable simpleCreditsTable">\n<colgroup>\n<col class=" ...
## [16] <table class="simpleTable simpleCreditsTable">\n<colgroup>\n<col class=" ...
## [17] <table class="simpleTable simpleCreditsTable">\n<colgroup>\n<col class=" ...
## [18] <table class="simpleTable simpleCreditsTable">\n<colgroup>\n<col class=" ...
## [19] <table class="simpleTable simpleCreditsTable">\n<colgroup>\n<col class=" ...
## [20] <table class="simpleTable simpleCreditsTable">\n<colgroup>\n<col class=" ...
## ...
tibble_list <- html_table(table_html[3])
tibble_list
## [[1]]
## # A tibble: 3,152 × 4
##    X1    X2               X3    X4                                              
##    <lgl> <chr>            <chr> <chr>                                           
##  1 NA    ""               ""    ""                                              
##  2 NA    "Angela Bassett" "..." "Athena Grant\n         / ...  \n              …
##  3 NA    ""               ""    ""                                              
##  4 NA    "Peter Krause"   "..." "Bobby Nash\n                  115 episodes, 20…
##  5 NA    ""               ""    ""                                              
##  6 NA    "Oliver Stark"   "..." "Evan 'Buck' Buckley\n                  115 epi…
##  7 NA    ""               ""    ""                                              
##  8 NA    "Aisha Hinds"    "..." "Henrietta 'Hen' Wilson\n                  115 …
##  9 NA    ""               ""    ""                                              
## 10 NA    "Kenneth Choi"   "..." "Howie 'Chimney' Han\n                  115 epi…
## # ℹ 3,142 more rows

Question 7

clean_tibble <- tibble_list[[1]]
CastCrew_list <- clean_tibble[, c(2,4)]
CastCrew_list 
CastCrew_list <- subset(CastCrew_list, CastCrew_list[,1] != "" & CastCrew_list[,2] != "")
CastCrew_list

Question 8

names(clean_tibble) <- c("x2","x4")
## Warning: The `value` argument of `names<-()` must have the same length as `x` as of
## tibble 3.0.0.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
## Warning: The `value` argument of `names<-()` can't be empty as of tibble 3.0.0.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
colnames(clean_tibble) <- c("x2","x4")

Question 9

CastCrew_html <- read_html("https://www.imdb.com/title/tt7235466/fullcredits?ref_=tt_cl_sm")
SeriesVisual_html <- html_element(CastCrew_html, "#fullcredits_content > table:nth-child(38)")
SeriesVisual_html <- html_table(SeriesVisual_html)
SeriesVisual_html