URLs
This gets the top movies of each decade:
decade <- seq(1950, 2000, by = 10)
urls <- paste0("http://www.imdb.com/search/title/?release_date=",
decade, ",",
decade + 9,
"&title_type=feature")
urls## [1] "http://www.imdb.com/search/title/?release_date=1950,1959&title_type=feature"
## [2] "http://www.imdb.com/search/title/?release_date=1960,1969&title_type=feature"
## [3] "http://www.imdb.com/search/title/?release_date=1970,1979&title_type=feature"
## [4] "http://www.imdb.com/search/title/?release_date=1980,1989&title_type=feature"
## [5] "http://www.imdb.com/search/title/?release_date=1990,1999&title_type=feature"
## [6] "http://www.imdb.com/search/title/?release_date=2000,2009&title_type=feature"
This gets the links to the top 50 movies of a single decade:
page <- read_html(urls[5])
links <-
page %>%
html_nodes(".mode-advanced a") %>%
html_attr("href")
isMovie <- grepl("^/title.*tt$", links)
movieLinks <- links[isMovie]
movieLinks <- paste0("http://www.imdb.com", movieLinks)
movieLinks[1:10]## [1] "http://www.imdb.com/title/tt0111161/?ref_=adv_li_tt"
## [2] "http://www.imdb.com/title/tt0169547/?ref_=adv_li_tt"
## [3] "http://www.imdb.com/title/tt0110357/?ref_=adv_li_tt"
## [4] "http://www.imdb.com/title/tt0110912/?ref_=adv_li_tt"
## [5] "http://www.imdb.com/title/tt0119654/?ref_=adv_li_tt"
## [6] "http://www.imdb.com/title/tt0112642/?ref_=adv_li_tt"
## [7] "http://www.imdb.com/title/tt0120338/?ref_=adv_li_tt"
## [8] "http://www.imdb.com/title/tt0103874/?ref_=adv_li_tt"
## [9] "http://www.imdb.com/title/tt0137523/?ref_=adv_li_tt"
## [10] "http://www.imdb.com/title/tt0102926/?ref_=adv_li_tt"
This gets all of the characters in The Shawshank Redemption who have links for their names:
shawshank <- read_html(movieLinks[1])
characters <-
shawshank %>%
html_nodes("td.character a") %>%
html_text()
characters## [1] "Andy Dufresne" "Ellis Boyd 'Red' Redding"
## [3] "Warden Norton" "Heywood"
## [5] "Captain Hadley" "Tommy"
## [7] "Bogs Diamond" "Brooks Hatlen"
## [9] "Skeet" "Floyd"
Try Shae
movieName <- c("Mia", "Ariel")
ym <- data.frame(year_movie = c(1994, 1984),
name = movieName)
BabyNames %>%
filter(name %in% movieName) %>%
inner_join(ym) %>%
group_by(name, year, year_movie) %>%
summarise(total = sum(count)) %>%
ggplot(aes(x = year, y = total)) +
geom_line() +
geom_vline(aes(xintercept = year_movie)) +
facet_wrap(~ name)