Jesse Yang
Mar 1, 2017
library(dplyr)
library(rvest)
lego_movie <- read_html("http://www.imdb.com/title/tt1490017/")
rating <- lego_movie %>%
html_nodes("strong span") %>%
html_text() %>%
as.numeric()
rating
[1] 7.8
library(httr)
library(xml2)
r <- GET("http://www.imdb.com/title/tt1490017/")
status_code(r)
[1] 200
headers(r)
$date
[1] "Thu, 02 Mar 2017 19:24:26 GMT"
$server
[1] "Server"
$`x-frame-options`
[1] "SAMEORIGIN"
$`content-security-policy`
[1] "frame-ancestors 'self' imdb.com *.imdb.com *.media-imdb.com withoutabox.com *.withoutabox.com amazon.com *.amazon.com amazon.co.uk *.amazon.co.uk amazon.de *.amazon.de translate.google.com images.google.com www.google.com www.google.co.uk search.aol.com bing.com www.bing.com"
$`ad-unit`
[1] "imdb.title_md.title.maindetails"
$`entity-id`
[1] "tt1490017"
$`content-type`
[1] "text/html;charset=UTF-8"
$`content-language`
[1] "en-US"
$`content-encoding`
[1] "gzip"
$vary
[1] "Accept-Encoding,User-Agent"
$`set-cookie`
[1] "uu=BCYkgE3ZbeuB_e_YwaWey6PiOv8gagS5Iouf4A1y4u7VOp_i2qCk7imFouMvMJ8ERnrhwM6auEab%0D%0AMqx2u52D9S_SAFD0qICvkJJaKtCsnnQPvDfTUWwd_beRyVR7VZAzXBpX137JXbsChjVIRo2VfjM0%0D%0AT3r4ay7rPTo8a51gz8bJS2kGp4ov0unaLt3jv9Yv37bFN_2F_KctUPAYYIGBlrw34ppZ6Tf7EDN0%0D%0A5zJSculsOzETGkeZHeFLg_wraVbZnYH3rTeOhmQ0Wlw1R_vAlvCrVg%0D%0A; Domain=.imdb.com; Expires=Tue, 20-Mar-2085 22:38:34 GMT; Path=/"
$`set-cookie`
[1] "session-id=507-7012615-2686357; Domain=.imdb.com; Expires=Tue, 20-Mar-2085 22:38:34 GMT; Path=/"
$`set-cookie`
[1] "session-id-time=1646162666; Domain=.imdb.com; Expires=Tue, 20-Mar-2085 22:38:34 GMT; Path=/"
$p3p
[1] "policyref=\"http://i.imdb.com/images/p3p.xml\",CP=\"CAO DSP LAW CUR ADM IVAo IVDo CONo OTPo OUR DELi PUBi OTRi BUS PHY ONL UNI PUR FIN COM NAV INT DEM CNT STA HEA PRE LOC GOV OTC \""
$`transfer-encoding`
[1] "chunked"
attr(,"class")
[1] "insensitive" "list"
str(content(r))
List of 2
$ node:<externalptr>
$ doc :<externalptr>
- attr(*, "class")= chr [1:2] "xml_document" "xml_node"
content(r) %>%
xml_find_all('//*[@id="titleCast"]//td[@class="itemprop"]/a') %>%
xml_text() %>%
stringr::str_trim()
[1] "Will Arnett" "Elizabeth Banks" "Craig Berry"
[4] "Alison Brie" "David Burrows" "Anthony Daniels"
[7] "Charlie Day" "Amanda Farinos" "Keith Ferguson"
[10] "Will Ferrell" "Will Forte" "Dave Franco"
[13] "Morgan Freeman" "Todd Hansen" "Jonah Hill"