Advice for Shae Henry

Homer White

2017-11-13

URLs

This gets the top movies of each decade:

decade <- seq(1950, 2000, by = 10)
urls <- paste0("http://www.imdb.com/search/title/?release_date=",
               decade, ",",
               decade + 9,
               "&title_type=feature")
urls
## [1] "http://www.imdb.com/search/title/?release_date=1950,1959&title_type=feature"
## [2] "http://www.imdb.com/search/title/?release_date=1960,1969&title_type=feature"
## [3] "http://www.imdb.com/search/title/?release_date=1970,1979&title_type=feature"
## [4] "http://www.imdb.com/search/title/?release_date=1980,1989&title_type=feature"
## [5] "http://www.imdb.com/search/title/?release_date=1990,1999&title_type=feature"
## [6] "http://www.imdb.com/search/title/?release_date=2000,2009&title_type=feature"

This gets the links to the top 50 movies of a single decade:

page <- read_html(urls[5])
links <-
  page %>%
  html_nodes(".mode-advanced a") %>%
  html_attr("href")
isMovie <- grepl("^/title.*tt$", links)
movieLinks <- links[isMovie]
movieLinks <- paste0("http://www.imdb.com", movieLinks)
movieLinks[1:10]
##  [1] "http://www.imdb.com/title/tt0111161/?ref_=adv_li_tt"
##  [2] "http://www.imdb.com/title/tt0169547/?ref_=adv_li_tt"
##  [3] "http://www.imdb.com/title/tt0110357/?ref_=adv_li_tt"
##  [4] "http://www.imdb.com/title/tt0110912/?ref_=adv_li_tt"
##  [5] "http://www.imdb.com/title/tt0119654/?ref_=adv_li_tt"
##  [6] "http://www.imdb.com/title/tt0112642/?ref_=adv_li_tt"
##  [7] "http://www.imdb.com/title/tt0120338/?ref_=adv_li_tt"
##  [8] "http://www.imdb.com/title/tt0103874/?ref_=adv_li_tt"
##  [9] "http://www.imdb.com/title/tt0137523/?ref_=adv_li_tt"
## [10] "http://www.imdb.com/title/tt0102926/?ref_=adv_li_tt"

This gets all of the characters in The Shawshank Redemption who have links for their names:

shawshank <- read_html(movieLinks[1])
characters <-
  shawshank %>%
  html_nodes("td.character a") %>%
  html_text()
characters
##  [1] "Andy Dufresne"            "Ellis Boyd 'Red' Redding"
##  [3] "Warden Norton"            "Heywood"                 
##  [5] "Captain Hadley"           "Tommy"                   
##  [7] "Bogs Diamond"             "Brooks Hatlen"           
##  [9] "Skeet"                    "Floyd"

Try Shae

movieName <- c("Mia", "Ariel")
ym <- data.frame(year_movie = c(1994, 1984),
                 name = movieName)
BabyNames %>%
  filter(name %in% movieName) %>%
  inner_join(ym) %>%
  group_by(name, year, year_movie) %>%
  summarise(total = sum(count)) %>%
  ggplot(aes(x = year, y = total)) +
  geom_line() +
  geom_vline(aes(xintercept = year_movie)) +
  facet_wrap(~ name)