pacman::p_load(robotstxt, rvest)
# library(robotstxt)
# library(rvest)
# Check if scraping is allowed
paths_allowed("https://www.imdb.com/title/tt7235466/fullcredits?ref_=tt_cl_sm")

 www.imdb.com                      
[1] TRUE
# Read IMDb full credits page
imdb_html <- read_html("https://www.imdb.com/title/tt7235466/fullcredits?ref_=tt_cl_sm")
imdb_html
{html_document}
<html xmlns:og="http://ogp.me/ns#" xmlns:fb="http://www.facebook.com/2008/fbml">
[1] <head>\n<meta http-equiv="Content-Type" content="text/html; charset=UTF-8">\n<script type="text/j ...
[2] <body id="styleguide-v2" class="fixed">\n            <img height="1" width="1" style="display:non ...
# Extract tables from the IMDb page
table_html <- html_elements(imdb_html, "table")
table_html
{xml_nodeset (30)}
 [1] <table class="simpleTable simpleCreditsTable">\n<colgroup>\n<col class="column1">\n<col class="c ...
 [2] <table class="simpleTable simpleCreditsTable">\n<colgroup>\n<col class="column1">\n<col class="c ...
 [3] <table class="cast_list">\n<tr><td colspan="4" class="castlist_label"></td></tr>\n<tr class="odd ...
 [4] <table class="simpleTable simpleCreditsTable">\n<colgroup>\n<col class="column1">\n<col class="c ...
 [5] <table class="simpleTable simpleCreditsTable">\n<colgroup>\n<col class="column1">\n<col class="c ...
 [6] <table class="simpleTable simpleCreditsTable">\n<colgroup>\n<col class="column1">\n<col class="c ...
 [7] <table class="simpleTable simpleCreditsTable">\n<colgroup>\n<col class="column1">\n<col class="c ...
 [8] <table class="simpleTable simpleCreditsTable">\n<colgroup>\n<col class="column1">\n<col class="c ...
 [9] <table class="simpleTable simpleCreditsTable">\n<colgroup>\n<col class="column1">\n<col class="c ...
[10] <table class="simpleTable simpleCreditsTable">\n<colgroup>\n<col class="column1">\n<col class="c ...
[11] <table class="simpleTable simpleCreditsTable">\n<colgroup>\n<col class="column1">\n<col class="c ...
[12] <table class="simpleTable simpleCreditsTable">\n<colgroup>\n<col class="column1">\n<col class="c ...
[13] <table class="simpleTable simpleCreditsTable">\n<colgroup>\n<col class="column1">\n<col class="c ...
[14] <table class="simpleTable simpleCreditsTable">\n<colgroup>\n<col class="column1">\n<col class="c ...
[15] <table class="simpleTable simpleCreditsTable">\n<colgroup>\n<col class="column1">\n<col class="c ...
[16] <table class="simpleTable simpleCreditsTable">\n<colgroup>\n<col class="column1">\n<col class="c ...
[17] <table class="simpleTable simpleCreditsTable">\n<colgroup>\n<col class="column1">\n<col class="c ...
[18] <table class="simpleTable simpleCreditsTable">\n<colgroup>\n<col class="column1">\n<col class="c ...
[19] <table class="simpleTable simpleCreditsTable">\n<colgroup>\n<col class="column1">\n<col class="c ...
[20] <table class="simpleTable simpleCreditsTable">\n<colgroup>\n<col class="column1">\n<col class="c ...
...
# Convert the first table (cast list) into a tibble (data frame)
cast_tibble <- html_table(table_html[1])
cast_tibble
[[1]]
NA
# Extract another table (crew list)
crew_tibble <- html_table(table_html[2])
crew_tibble
[[1]]
NA
# Display cleaned cast list
cast_tibble
[[1]]
NA
# Display cleaned crew list
crew_tibble
[[1]]
NA
# Scrape the 3rd table (Series Cast)
series_cast_tibble <- html_table(table_html[3])  # Extract 3rd table

# Display the table
print(series_cast_tibble)
[[1]]
# Get dimensions of the table (Rows, Columns)
cast_dim <- dim(series_cast_tibble)
print(paste("Rows:", cast_dim[1], "Columns:", cast_dim[2]))
[1] "Rows:  Columns: "
LS0tDQp0aXRsZTogIlIgTm90ZWJvb2siDQpvdXRwdXQ6IGh0bWxfbm90ZWJvb2sNCi0tLQ0KDQpgYGB7cn0NCnBhY21hbjo6cF9sb2FkKHJvYm90c3R4dCwgcnZlc3QpDQojIGxpYnJhcnkocm9ib3RzdHh0KQ0KIyBsaWJyYXJ5KHJ2ZXN0KQ0KDQpgYGANCg0KYGBge3J9DQojIENoZWNrIGlmIHNjcmFwaW5nIGlzIGFsbG93ZWQNCnBhdGhzX2FsbG93ZWQoImh0dHBzOi8vd3d3LmltZGIuY29tL3RpdGxlL3R0NzIzNTQ2Ni9mdWxsY3JlZGl0cz9yZWZfPXR0X2NsX3NtIikNCg0KYGBgDQoNCmBgYHtyfQ0KIyBSZWFkIElNRGIgZnVsbCBjcmVkaXRzIHBhZ2UNCmltZGJfaHRtbCA8LSByZWFkX2h0bWwoImh0dHBzOi8vd3d3LmltZGIuY29tL3RpdGxlL3R0NzIzNTQ2Ni9mdWxsY3JlZGl0cz9yZWZfPXR0X2NsX3NtIikNCmltZGJfaHRtbA0KDQpgYGANCg0KYGBge3J9DQojIEV4dHJhY3QgdGFibGVzIGZyb20gdGhlIElNRGIgcGFnZQ0KdGFibGVfaHRtbCA8LSBodG1sX2VsZW1lbnRzKGltZGJfaHRtbCwgInRhYmxlIikNCnRhYmxlX2h0bWwNCg0KYGBgDQoNCmBgYHtyfQ0KIyBDb252ZXJ0IHRoZSBmaXJzdCB0YWJsZSAoY2FzdCBsaXN0KSBpbnRvIGEgdGliYmxlIChkYXRhIGZyYW1lKQ0KY2FzdF90aWJibGUgPC0gaHRtbF90YWJsZSh0YWJsZV9odG1sWzFdKQ0KY2FzdF90aWJibGUNCg0KYGBgDQpgYGB7cn0NCiMgRXh0cmFjdCBhbm90aGVyIHRhYmxlIChjcmV3IGxpc3QpDQpjcmV3X3RpYmJsZSA8LSBodG1sX3RhYmxlKHRhYmxlX2h0bWxbMl0pDQpjcmV3X3RpYmJsZQ0KDQpgYGANCmBgYHtyfQ0KIyBEaXNwbGF5IGNsZWFuZWQgY2FzdCBsaXN0DQpjYXN0X3RpYmJsZQ0KDQpgYGANCg0KYGBge3J9DQojIERpc3BsYXkgY2xlYW5lZCBjcmV3IGxpc3QNCmNyZXdfdGliYmxlDQoNCmBgYA0KDQpgYGB7cn0NCiMgU2NyYXBlIHRoZSAzcmQgdGFibGUgKFNlcmllcyBDYXN0KQ0Kc2VyaWVzX2Nhc3RfdGliYmxlIDwtIGh0bWxfdGFibGUodGFibGVfaHRtbFszXSkgICMgRXh0cmFjdCAzcmQgdGFibGUNCg0KIyBEaXNwbGF5IHRoZSB0YWJsZQ0KcHJpbnQoc2VyaWVzX2Nhc3RfdGliYmxlKQ0KDQojIEdldCBkaW1lbnNpb25zIG9mIHRoZSB0YWJsZSAoUm93cywgQ29sdW1ucykNCmNhc3RfZGltIDwtIGRpbShzZXJpZXNfY2FzdF90aWJibGUpDQpwcmludChwYXN0ZSgiUm93czoiLCBjYXN0X2RpbVsxXSwgIkNvbHVtbnM6IiwgY2FzdF9kaW1bMl0pKQ0KDQpgYGANCg0KDQoNCg0K