github: https://github.com/rweberc/Data607_Assignment9
reviewDf <- NULL
getReviews <- function(offset){
# Create URL
url <- str_c("https://api.nytimes.com/svc/movies/v2/reviews/search.json?critics-pick=Y&api-key=",
api_key, "&offset=", offset)
# Get content
reviewList <- content(GET(url), "parse")
# Parse relevant fields to df
reviewList$results %>% map_df(extract, c('display_title', 'publication_date'))
}
# Retrieve Critics Picks for the past 5 years #940
titlesList <- map(seq(340,1400, 20), getReviews)
#jsonedit(titlesList)
titleDf <- map_df(titlesList, extract)
# Filter publication date from 2013 to
titleDf$publication_date <- ymd(titleDf$publication_date)
summary(titleDf$publication_date)
## Min. 1st Qu. Median Mean 3rd Qu.
## "2010-06-16" "2011-04-07" "2013-10-24" "2013-07-16" "2015-11-05"
## Max.
## "2016-06-09"
titleDf <- titleDf %>% filter(year(publication_date) >= 2010 & year(publication_date) <= 2015)
head(titleDf)
## # A tibble: 6 x 2
## display_title publication_date
## <chr> <date>
## 1 Anomalisa 2015-12-29
## 2 The Story of the Last Chrysanthemums 2015-12-24
## 3 Where to Invade Next 2015-12-22
## 4 45 Years 2015-12-22
## 5 He Never Died 2015-12-17
## 6 Winding Stream 2015-12-15
oscarDf <- read.csv("https://raw.githubusercontent.com/rweberc/Data607_Assignment9/master/database.csv",
stringsAsFactors = FALSE)
# Parse year and select columns of interest
oscarDf$Year <- str_replace(oscarDf$Year, "/.*", "")
oscarDf$Year <- as.numeric(oscarDf$Year)
oscarDf <- oscarDf %>%
filter(Year >= 2010 & Year <= 2015) %>%
select(Film, Name, Year, Award, Winner)
foreignDf <- oscarDf %>%
filter(Award == "Foreign Language Film")
head(foreignDf)
## Film Name Year Award Winner
## 1 Mexico Biutiful 2010 Foreign Language Film NA
## 2 Greece Dogtooth 2010 Foreign Language Film NA
## 3 Denmark In a Better World 2010 Foreign Language Film 1
## 4 Canada Incendies 2010 Foreign Language Film NA
## 5 Algeria Outside the Law (Hors-la-loi) 2010 Foreign Language Film NA
## 6 Belgium Bullhead 2011 Foreign Language Film NA
foreignCriticPickDf <- inner_join(foreignDf, titleDf, by=c("Name"="display_title"))
foreignCriticPickDf
## Film Name Year Award Winner publication_date
## 1 France Mustang 2015 Foreign Language Film NA 2015-11-19
## 2 Jordan Theeb 2015 Foreign Language Film NA 2015-11-05
Only 2 out of 30 Foreign Language Film nominees look to have been Critic’s Picks.
winnerDf <- oscarDf %>%
filter(Winner == 1)
# Film name in database is sometimes in different
winnerCriticPickDf1 <- inner_join(winnerDf, titleDf, by=c("Film"="display_title"))
winnerCriticPickDf2 <- inner_join(winnerDf, titleDf, by=c("Name"="display_title"))
winnerCriticPickDf <- bind_rows(winnerCriticPickDf1, winnerCriticPickDf2)
head(winnerCriticPickDf)
## Film
## 1 The Fighter
## 2 Black Swan
## 3 The Fighter
## 4 12 Years a Slave
## 5 Michael Sugar, Steve Golin, Nicole Rocklin and Blye Pagon Faust, Producers
## 6 Screenplay by Charles Randolph and Adam McKay
## Name Year Award Winner
## 1 Christian Bale 2010 Actor in a Supporting Role 1
## 2 Natalie Portman 2010 Actress in a Leading Role 1
## 3 Melissa Leo 2010 Actress in a Supporting Role 1
## 4 Lupita Nyong'o 2013 Actress in a Supporting Role 1
## 5 Spotlight 2015 Best Picture 1
## 6 The Big Short 2015 Writing (Adapted Screenplay) 1
## publication_date
## 1 2010-12-09
## 2 2010-12-02
## 3 2010-12-09
## 4 2013-10-17
## 5 2015-11-05
## 6 2015-12-10
Only 6 out of 74 Oscar winners overall were Critic’s Picks.
The low numbers make me wonder if there is some assumption I missed. Could possibly lose some cases due to special character differences between the two datasets.
Not sure if this is really an indication that the Oscars are missing important movies or if the Critics Picks are.
Could compare these against a more reputable international film award source to try to get a sense of that.