#Install packages
#install.packages(‘rvest’) #install.packages(‘dplyr’) #install.packages(‘ggplot2’)
Load packages
#Loading the rvest package
library('ggplot2')
library('rvest')
library('dplyr')
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
#Specifying the url for desired website to be scraped
url <- 'http://www.imdb.com/search/title?count=100&release_date=2016,2016&title_type=feature'
#Reading the HTML code from the website
webpage <- read_html(url)
#Using CSS selectors to scrape the rankings section
rank_data_html <- html_nodes(webpage,'.text-primary')
#Converting the ranking data to text
rank_data <- html_text(rank_data_html)
#Let's have a look at the rankings
head(rank_data)
## [1] "1." "2." "3." "4." "5." "6."
#Data-Preprocessing: Converting rankings to numerical
rank_data<-as.numeric(rank_data)
#Let's have another look at the rankings
head(rank_data)
## [1] 1 2 3 4 5 6
#Using CSS selectors to scrape the title section
title_data_html <- html_nodes(webpage,'.lister-item-header a')
#Converting the title data to text
title_data <- html_text(title_data_html)
#Let's have a look at the title
head(title_data)
## [1] "Suicide Squad" "The Conjuring 2" "Captain Fantastic"
## [4] "Sing" "Deadpool" "Hidden Figures"
#Using CSS selectors to scrape the description section
description_data_html <- html_nodes(webpage,'.ratings-bar+ .text-muted')
#Converting the description data to text
description_data <- html_text(description_data_html)
#Let's have a look at the description data
head(description_data)
## [1] "\nA secret government agency recruits some of the most dangerous incarcerated super-villains to form a defensive task force. Their first mission: save the world from the apocalypse."
## [2] "\nEd and Lorraine Warren travel to North London to help a single mother raising four children alone in a house plagued by a supernatural spirit."
## [3] "\nIn the forests of the Pacific Northwest, a father devoted to raising his six kids with a rigorous physical and intellectual education is forced to leave his paradise and enter the world, challenging his idea of what it means to be a parent."
## [4] "\nIn a city of humanoid animals, a hustling theater impresario's attempt to save his theater with a singing competition becomes grander than he anticipates even as its finalists find that their lives will never be the same."
## [5] "\nA wisecracking mercenary gets experimented on and becomes immortal but ugly, and sets out to track down the man who ruined his looks."
## [6] "\nThe story of a team of female African-American mathematicians who served a vital role in NASA during the early years of the U.S. space program."
#Data-Preprocessing: removing '\n'
description_data<-gsub("\n","",description_data)
#Let's have another look at the description data
head(description_data)
## [1] "A secret government agency recruits some of the most dangerous incarcerated super-villains to form a defensive task force. Their first mission: save the world from the apocalypse."
## [2] "Ed and Lorraine Warren travel to North London to help a single mother raising four children alone in a house plagued by a supernatural spirit."
## [3] "In the forests of the Pacific Northwest, a father devoted to raising his six kids with a rigorous physical and intellectual education is forced to leave his paradise and enter the world, challenging his idea of what it means to be a parent."
## [4] "In a city of humanoid animals, a hustling theater impresario's attempt to save his theater with a singing competition becomes grander than he anticipates even as its finalists find that their lives will never be the same."
## [5] "A wisecracking mercenary gets experimented on and becomes immortal but ugly, and sets out to track down the man who ruined his looks."
## [6] "The story of a team of female African-American mathematicians who served a vital role in NASA during the early years of the U.S. space program."
#Using CSS selectors to scrape the Movie runtime section
runtime_data_html <- html_nodes(webpage,'.text-muted .runtime')
#Converting the runtime data to text
runtime_data <- html_text(runtime_data_html)
#Let's have a look at the runtime
head(runtime_data)
## [1] "123 min" "134 min" "118 min" "108 min" "108 min" "127 min"
#Data-Preprocessing: removing mins and converting it to numerical
runtime_data<-gsub(" min","",runtime_data)
runtime_data<-as.numeric(runtime_data)
#Let's have another look at the runtime data
head(runtime_data)
## [1] 123 134 118 108 108 127
#Using CSS selectors to scrape the Movie genre section
genre_data_html <- html_nodes(webpage,'.genre')
#Converting the genre data to text
genre_data <- html_text(genre_data_html)
#Let's have a look at the runtime
head(genre_data)
## [1] "\nAction, Adventure, Fantasy "
## [2] "\nHorror, Mystery, Thriller "
## [3] "\nComedy, Drama "
## [4] "\nAnimation, Comedy, Family "
## [5] "\nAction, Adventure, Comedy "
## [6] "\nBiography, Drama, History "
#Data-Preprocessing: removing \n
genre_data<-gsub("\n","",genre_data)
#Data-Preprocessing: removing excess spaces
genre_data<-gsub(" ","",genre_data)
#taking only the first genre of each movie
genre_data<-gsub(",.*","",genre_data)
#Convering each genre from text to factor
genre_data<-as.factor(genre_data)
#Let's have another look at the genre data
head(genre_data)
## [1] Action Horror Comedy Animation Action Biography
## Levels: Action Adventure Animation Biography Comedy Crime Drama Horror
#Using CSS selectors to scrape the IMDB rating section
rating_data_html <- html_nodes(webpage,'.ratings-imdb-rating strong')
#Converting the ratings data to text
rating_data <- html_text(rating_data_html)
#Let's have a look at the ratings
head(rating_data)
## [1] "5.9" "7.3" "7.9" "7.1" "8.0" "7.8"
#Data-Preprocessing: converting ratings to numerical
rating_data<-as.numeric(rating_data)
#Let's have another look at the ratings data
head(rating_data)
## [1] 5.9 7.3 7.9 7.1 8.0 7.8
#Using CSS selectors to scrape the votes section
votes_data_html <- html_nodes(webpage,'.sort-num_votes-visible span:nth-child(2)')
#Converting the votes data to text
votes_data <- html_text(votes_data_html)
#Let's have a look at the votes data
head(votes_data)
## [1] "622,860" "239,793" "199,932" "138,682" "928,720" "208,173"
#Data-Preprocessing: removing commas
votes_data<-gsub(",","",votes_data)
#Data-Preprocessing: converting votes to numerical
votes_data<-as.numeric(votes_data)
#Let's have another look at the votes data
head(votes_data)
## [1] 622860 239793 199932 138682 928720 208173
#Using CSS selectors to scrape the directors section
directors_data_html <- html_nodes(webpage,'.text-muted+ p a:nth-child(1)')
#Converting the directors data to text
directors_data <- html_text(directors_data_html)
#Let's have a look at the directors data
head(directors_data)
## [1] "David Ayer" "James Wan" "Matt Ross" "Garth Jennings"
## [5] "Tim Miller" "Theodore Melfi"
#Data-Preprocessing: converting directors data into factors
directors_data<-as.factor(directors_data)
#Using CSS selectors to scrape the actors section
actors_data_html <- html_nodes(webpage,'.lister-item-content .ghost+ a')
#Converting the gross actors data to text
actors_data <- html_text(actors_data_html)
#Let's have a look at the actors data
head(actors_data)
## [1] "Will Smith" "Vera Farmiga" "Viggo Mortensen"
## [4] "Matthew McConaughey" "Ryan Reynolds" "Taraji P. Henson"
#Using CSS selectors to scrape the metascore section
metascore_data_html <- html_nodes(webpage,'.metascore')
#Converting the runtime data to text
metascore_data <- html_text(metascore_data_html)
#Let's have a look at the metascore data
head(metascore_data)
## [1] "40 " "65 " "72 " "59 " "65 "
## [6] "74 "
#Data-Preprocessing: removing extra space in metascore
metascore_data<-gsub(" ","",metascore_data)
#Lets check the length of metascore data
length(metascore_data)
## [1] 96
34,64,82,88
for (i in c(78,81,82,88)){
a<-metascore_data[1:(i-1)]
b<-metascore_data[i:length(metascore_data)]
metascore_data<-append(a,list("NA"))
metascore_data<-append(metascore_data,b)
}
#Data-Preprocessing: converting metascore to numerical
metascore_data<-as.numeric(metascore_data)
## Warning: NAs introduced by coercion
## Warning: NAs introduced by coercion
## Warning: NAs introduced by coercion
## Warning: NAs introduced by coercion
#Let's have another look at length of the metascore data
length(metascore_data)
## [1] 100
#Let's look at summary statistics
summary(metascore_data)
## Min. 1st Qu. Median Mean 3rd Qu. Max. NA's
## 23.00 46.75 59.50 59.15 72.00 99.00 4
##scrape the gross revenue section
gross_data_html <- html_nodes(webpage,'.ghost~ .text-muted+ span')
##convert gross revenue to text
gross_data <- html_text(gross_data_html)
#Data-Preprocessing: removing '$' and 'M' signs
gross_data<-gsub("M","",gross_data)
gross_data<-substring(gross_data,2,6)
#Let's check the length of gross data
length(gross_data)
## [1] 89
34,64,82,87,88,89,96
head(gross_data)
## [1] "325.1" "102.4" "5.88" "270.4" "363.0" "169.6"
length(gross_data)
## [1] 89
#Filling missing entries with NA
for (i in c(34,50,55,60,62,63,82,87,88,89,96)){
a<-gross_data[1:(i-1)]
b<-gross_data[i:length(gross_data)]
gross_data<-append(a,list("NA"))
gross_data<-append(gross_data,b)
}
#Data-Preprocessing: converting gross to numerical
gross_data<-as.numeric(gross_data)
## Warning: NAs introduced by coercion
## Warning: NAs introduced by coercion
## Warning: NAs introduced by coercion
## Warning: NAs introduced by coercion
## Warning: NAs introduced by coercion
## Warning: NAs introduced by coercion
## Warning: NAs introduced by coercion
## Warning: NAs introduced by coercion
## Warning: NAs introduced by coercion
## Warning: NAs introduced by coercion
## Warning: NAs introduced by coercion
#Let's have another look at the length of gross data
head(gross_data)
## [1] 325.10 102.40 5.88 270.40 363.00 169.60
34,64,82,88
length(gross_data)
## [1] 100
#Combining all the lists to form a data frame
movies_df<-data.frame(Rank = rank_data, Title = title_data,
Description = description_data, Runtime = runtime_data,
Genre = genre_data, Rating = rating_data,
Metascore = metascore_data, Votes = votes_data, Gross_Earning_in_Mil = gross_data,
Director = directors_data, Actor = actors_data)
#Structure of the data frame
str(movies_df)
## 'data.frame': 100 obs. of 11 variables:
## $ Rank : num 1 2 3 4 5 6 7 8 9 10 ...
## $ Title : chr "Suicide Squad" "The Conjuring 2" "Captain Fantastic" "Sing" ...
## $ Description : chr "A secret government agency recruits some of the most dangerous incarcerated super-villains to form a defensive "| __truncated__ "Ed and Lorraine Warren travel to North London to help a single mother raising four children alone in a house pl"| __truncated__ "In the forests of the Pacific Northwest, a father devoted to raising his six kids with a rigorous physical and "| __truncated__ "In a city of humanoid animals, a hustling theater impresario's attempt to save his theater with a singing compe"| __truncated__ ...
## $ Runtime : num 123 134 118 108 108 127 107 117 132 115 ...
## $ Genre : Factor w/ 8 levels "Action","Adventure",..: 1 8 5 3 1 4 3 8 1 1 ...
## $ Rating : num 5.9 7.3 7.9 7.1 8 7.8 7.6 7.3 6.9 7.5 ...
## $ Metascore : num 40 65 72 59 65 74 81 62 54 72 ...
## $ Votes : num 622860 239793 199932 138682 928720 ...
## $ Gross_Earning_in_Mil: num 325.1 102.4 5.88 270.4 363 ...
## $ Director : Factor w/ 99 levels "Alex Proyas",..: 23 42 59 35 95 93 83 56 8 87 ...
## $ Actor : chr "Will Smith" "Vera Farmiga" "Viggo Mortensen" "Matthew McConaughey" ...
Make a visualization of the dataset
qplot(data = movies_df,Runtime,fill = Genre,bins = 30)
Question 1: Based on the above data, which movie from which Genre had the longest runtime?
#find the longest movie runtime
filter(movies_df, Runtime > 150) %>%
arrange(., -Runtime)
## Rank Title
## 1 45 American Honey
## 2 42 Silence
## 3 64 Dangal
## 4 62 The Wailing
## 5 19 Batman v Superman: Dawn of Justice
## Description
## 1 A teenage girl with nothing to lose joins a traveling magazine sales crew, and gets caught up in a whirlwind of hard partying, law bending and young love as she criss-crosses the Midwest with a band of misfits.
## 2 In the 17th century, two Portuguese Jesuit priests travel to Japan in an attempt to locate their mentor, who is rumored to have committed apostasy, and to propagate Catholicism.
## 3 Former wrestler Mahavir Singh Phogat and his two wrestler daughters struggle towards glory at the Commonwealth Games in the face of societal oppression.
## 4 Soon after a stranger arrives in a little village, a mysterious sickness starts spreading. A policeman, drawn into the incident, is forced to solve the mystery in order to save his daughter.
## 5 Fearing that the actions of Superman are left unchecked, Batman takes on the Man of Steel, while the world wrestles with what kind of a hero it really needs.
## Runtime Genre Rating Metascore Votes Gross_Earning_in_Mil
## 1 163 Adventure 7.0 58 39831 0.66
## 2 161 Drama 7.2 77 103420 7.10
## 3 161 Action 8.4 23 165861 12.39
## 4 156 Horror 7.5 46 56393 NA
## 5 152 Action 6.4 44 653780 330.30
## Director Actor
## 1 Andrea Arnold Sasha Lane
## 2 Martin Scorsese Andrew Garfield
## 3 Nitesh Tiwari Aamir Khan
## 4 Na Hong-jin Jun Kunimura
## 5 Zack Snyder Ben Affleck
American Honey had the longest runtime of 163 minutes
Question 2: Based on the above data, in the Runtime of 130-160 mins, which genre has the highest votes?
# filtering the runtime from 130 mins to 160 mins
filter(movies_df, Runtime > 130 & Runtime < 160) %>%
arrange(., -Votes)
## Rank Title
## 1 13 Captain America: Civil War
## 2 19 Batman v Superman: Dawn of Justice
## 3 23 Rogue One: A Star Wars Story
## 4 14 Hacksaw Ridge
## 5 29 Fantastic Beasts and Where to Find Them
## 6 32 X-Men: Apocalypse
## 7 36 Manchester by the Sea
## 8 2 The Conjuring 2
## 9 9 The Magnificent Seven
## 10 69 Snowden
## 11 31 13 Hours
## 12 21 The Handmaiden
## 13 77 The Lost City of Z
## 14 85 Miss Sloane
## 15 62 The Wailing
## Description
## 1 Political involvement in the Avengers' affairs causes a rift between Captain America and Iron Man.
## 2 Fearing that the actions of Superman are left unchecked, Batman takes on the Man of Steel, while the world wrestles with what kind of a hero it really needs.
## 3 In a time of conflict, a group of unlikely heroes band together on a mission to steal the plans to the Death Star, the Empire's ultimate weapon of destruction.
## 4 World War II American Army Medic Desmond T. Doss, who served during the Battle of Okinawa, refuses to kill people and becomes the first man in American history to receive the Medal of Honor without firing a shot.
## 5 The adventures of writer Newt Scamander in New York's secret community of witches and wizards seventy years before Harry Potter reads his book in school.
## 6 In the 1980s the X-Men must defeat an ancient all-powerful mutant, En Sabah Nur, who intends to thrive through bringing destruction to the world.
## 7 A depressed uncle is asked to take care of his teenage nephew after the boy's father dies.
## 8 Ed and Lorraine Warren travel to North London to help a single mother raising four children alone in a house plagued by a supernatural spirit.
## 9 Seven gunmen from a variety of backgrounds are brought together by a vengeful young widow to protect her town from the private army of a destructive industrialist.
## 10 The NSA's illegal surveillance techniques are leaked to the public by one of the agency's employees, Edward Snowden, in the form of thousands of classified documents distributed to the press.
## 11 During an attack on a U.S. compound in Libya, a security team struggles to make sense out of the chaos.
## 12 A woman is hired as a handmaiden to a Japanese heiress, but secretly she is involved in a plot to defraud her.
## 13 A true-life drama, centering on British explorer Major Percival Fawcett, who disappeared whilst searching for a mysterious city in the Amazon in the 1920s.
## 14 In the high-stakes world of political power-brokers, Elizabeth Sloane is the most sought after and formidable lobbyist in D.C. But when taking on the most powerful opponent of her career, she finds winning may come at too high a price.
## 15 Soon after a stranger arrives in a little village, a mysterious sickness starts spreading. A policeman, drawn into the incident, is forced to solve the mystery in order to save his daughter.
## Runtime Genre Rating Metascore Votes Gross_Earning_in_Mil
## 1 147 Action 7.8 75 690610 408.00
## 2 152 Action 6.4 44 653780 330.30
## 3 133 Action 7.8 65 570655 532.10
## 4 139 Biography 8.1 71 455150 67.21
## 5 132 Adventure 7.3 66 421236 234.00
## 6 144 Action 6.9 52 403231 155.40
## 7 137 Drama 7.8 57 256003 47.70
## 8 134 Horror 7.3 65 239793 102.40
## 9 132 Action 6.9 54 189414 93.43
## 10 134 Biography 7.3 49 142467 21.59
## 11 144 Action 7.3 48 124860 52.85
## 12 145 Drama 8.1 84 122558 2.01
## 13 141 Biography 6.6 39 83230 8.58
## 14 132 Drama 7.5 64 67371 3.44
## 15 156 Horror 7.5 46 56393 NA
## Director Actor
## 1 Anthony Russo Chris Evans
## 2 Zack Snyder Ben Affleck
## 3 Gareth Edwards Felicity Jones
## 4 Mel Gibson Andrew Garfield
## 5 David Yates Eddie Redmayne
## 6 Bryan Singer James McAvoy
## 7 Kenneth Lonergan Casey Affleck
## 8 James Wan Vera Farmiga
## 9 Antoine Fuqua Denzel Washington
## 10 Oliver Stone Joseph Gordon-Levitt
## 11 Michael Bay John Krasinski
## 12 Park Chan-Wook Kim Min-hee
## 13 James Gray Charlie Hunnam
## 14 John Madden Jessica Chastain
## 15 Na Hong-jin Jun Kunimura
# find the top votes
top_votes <- movies_df %>%
group_by(Genre) %>%
summarise(most_votes = sum(Votes, na.rm=TRUE)) %>%
arrange(., -most_votes)
top_votes
## # A tibble: 8 x 2
## Genre most_votes
## <fct> <dbl>
## 1 Action 10066813
## 2 Drama 2602737
## 3 Animation 1706794
## 4 Biography 1677095
## 5 Comedy 1306174
## 6 Horror 1040972
## 7 Adventure 1001073
## 8 Crime 648132
Action movies had 10065947 votes which is the highest votes of all the genres.
Use a plot graph to look deeper into the gross earnings
ggplot(movies_df,aes(x=Runtime,y=Gross_Earning_in_Mil))+
geom_point(aes(size=Rating,col=Genre))
## Warning: Removed 11 rows containing missing values (geom_point).
Question 3: Based on the above data, across all genres which genre has the highest average gross earnings in runtime 100 to 120.
# Filter the average gross from 100 runtime to 120
filter(movies_df, Runtime > 100 & Runtime < 120)
## Rank Title
## 1 3 Captain Fantastic
## 2 4 Sing
## 3 5 Deadpool
## 4 7 Moana
## 5 8 Split
## 6 10 Doctor Strange
## 7 11 Nocturnal Animals
## 8 12 Arrival
## 9 16 The Nice Guys
## 10 17 Zootopia
## 11 18 Me Before You
## 12 20 Passengers
## 13 22 Train to Busan
## 14 26 Your Name.
## 15 30 The Neon Demon
## 16 33 Moonlight
## 17 34 The Invisible Guest
## 18 35 The Girl on the Train
## 19 38 The Legend of Tarzan
## 20 40 War Dogs
## 21 41 Hell or High Water
## 22 43 The Jungle Book
## 23 44 Triple 9
## 24 51 Hunt for the Wilderpeople
## 25 53 10 Cloverfield Lane
## 26 54 The Founder
## 27 56 Ghostbusters: Answer the Call
## 28 57 The Bad Batch
## 29 58 The 5th Wave
## 30 59 How to Be Single
## 31 61 Central Intelligence
## 32 65 Lion
## 33 70 Jack Reacher: Never Go Back
## 34 71 Resident Evil: The Final Chapter
## 35 74 Assassin's Creed
## 36 79 Why Him?
## 37 80 The Purge: Election Year
## 38 83 The Edge of Seventeen
## 39 86 Deepwater Horizon
## 40 90 Hail, Caesar!
## 41 92 The Huntsman: Winter's War
## 42 93 The Choice
## 43 94 Teenage Mutant Ninja Turtles: Out of the Shadows
## 44 95 The Great Wall
## 45 98 Criminal
## Description
## 1 In the forests of the Pacific Northwest, a father devoted to raising his six kids with a rigorous physical and intellectual education is forced to leave his paradise and enter the world, challenging his idea of what it means to be a parent.
## 2 In a city of humanoid animals, a hustling theater impresario's attempt to save his theater with a singing competition becomes grander than he anticipates even as its finalists find that their lives will never be the same.
## 3 A wisecracking mercenary gets experimented on and becomes immortal but ugly, and sets out to track down the man who ruined his looks.
## 4 In Ancient Polynesia, when a terrible curse incurred by the Demigod Maui reaches Moana's island, she answers the Ocean's call to seek out the Demigod to set things right.
## 5 Three girls are kidnapped by a man with a diagnosed 23 distinct personalities. They must try to escape before the apparent emergence of a frightful new 24th.
## 6 While on a journey of physical and spiritual healing, a brilliant neurosurgeon is drawn into the world of the mystic arts.
## 7 A wealthy art gallery owner is haunted by her ex-husband's novel, a violent thriller she interprets as a symbolic revenge tale.
## 8 A linguist works with the military to communicate with alien lifeforms after twelve mysterious spacecraft appear around the world.
## 9 In 1970s Los Angeles, a mismatched pair of private eyes investigate a missing girl and the mysterious death of a porn star.
## 10 In a city of anthropomorphic animals, a rookie bunny cop and a cynical con artist fox must work together to uncover a conspiracy.
## 11 A girl in a small town forms an unlikely bond with a recently-paralyzed man she's taking care of.
## 12 A malfunction in a sleeping pod on a spacecraft traveling to a distant colony planet wakes one passenger 90 years early.
## 13 While a zombie virus breaks out in South Korea, passengers struggle to survive on the train from Seoul to Busan.
## 14 Two strangers find themselves linked in a bizarre way. When a connection forms, will distance be the only thing to keep them apart?
## 15 An aspiring model, Jesse, is new to Los Angeles. However, her beauty and youth, which generate intense fascination and jealousy within the fashion industry, may prove themselves sinister.
## 16 A young African-American man grapples with his identity and sexuality while experiencing the everyday struggles of childhood, adolescence, and burgeoning adulthood.
## 17 A successful entrepreneur accused of murder and a witness preparation expert have less than three hours to come up with an impregnable defense.
## 18 A divorcee becomes entangled in a missing persons investigation that promises to send shockwaves throughout her life.
## 19 Tarzan, having acclimated to life in London, is called back to his former home in the jungle to investigate the activities at a mining encampment.
## 20 Loosely based on the true story of two young men, David Packouz and Efraim Diveroli, who won a three hundred million dollar contract from the Pentagon to arm America's allies in Afghanistan.
## 21 A divorced father and his ex-con older brother resort to a desperate scheme in order to save their family's ranch in West Texas.
## 22 After a threat from the tiger Shere Khan forces him to flee the jungle, a man-cub named Mowgli embarks on a journey of self discovery with the help of panther Bagheera and free-spirited bear Baloo.
## 23 A gang of criminals and corrupt cops plan the murder of a police officer in order to pull off their biggest heist yet across town.
## 24 A national manhunt is ordered for a rebellious kid and his foster uncle who go missing in the wild New Zealand bush.
## 25 After getting in a car accident, a woman is held in a shelter by a man who claims that the outside world is affected by a widespread chemical attack.
## 26 The story of Ray Kroc, a salesman who turned two brothers' innovative fast food eatery, McDonald's, into the biggest restaurant business in the world, with a combination of ambition, persistence, and ruthlessness.
## 27 Following a ghost invasion of Manhattan, paranormal enthusiasts Erin Gilbert and Abby Yates, nuclear engineer Jillian Holtzmann, and subway worker Patty Tolan band together to stop the otherworldly threat.
## 28 In a desert dystopia, a young woman is kidnapped by cannibals.
## 29 Four waves of increasingly deadly alien attacks have left most of Earth in ruin. Cassie is on the run, desperately trying to save her younger brother.
## 30 A group of young adults navigate love and relationships in New York City.
## 31 After he reconnects with an awkward pal from high school through Facebook, a mild-mannered accountant is lured into the world of international espionage.
## 32 A five-year-old Indian boy is adopted by an Australian couple after getting lost hundreds of kilometers from home. 25 years later, he sets out to find his lost family.
## 33 Jack Reacher must uncover the truth behind a major government conspiracy in order to clear his name while on the run as a fugitive from the law.
## 34 Alice returns to where the nightmare began: The Hive in Raccoon City, where the Umbrella Corporation is gathering its forces for a final strike against the only remaining survivors of the apocalypse.
## 35 Callum Lynch explores the memories of his ancestor Aguilar de Nerha and gains the skills of a Master Assassin, before taking on the secret Templar society.
## 36 A holiday gathering threatens to go off the rails when Ned Fleming realizes that his daughter's Silicon Valley millionaire boyfriend is about to pop the question.
## 37 Former Police Sergeant Barnes becomes head of security for Senator Charlie Roan, a Presidential candidate targeted for death on Purge night due to her vow to eliminate the Purge.
## 38 High-school life gets even more unbearable for Nadine when her best friend, Krista, starts dating her older brother.
## 39 A dramatization of the disaster in April 2010, when the offshore drilling rig called the Deepwater Horizon exploded, resulting in the worst oil spill in American history.
## 40 A Hollywood fixer in the 1950s works to keep the studio's stars in line.
## 41 Eric and fellow warrior Sara, raised as members of ice Queen Freya's army, try to conceal their forbidden love as they fight to survive the wicked intentions of both Freya and her sister Ravenna.
## 42 Travis and Gabby first meet as neighbors in a small coastal town and wind up in a relationship that is tested by life's most defining events.
## 43 The Turtles get into another battle with their enemy the Shredder, who has acquired new allies: the mutant thugs Bebop and Rocksteady and the alien being Krang.
## 44 In ancient China, a group of European mercenaries encounters a secret army that maintains and defends the Great Wall of China against a horde of monstrous creatures.
## 45 In a last-ditch effort to stop a diabolical plot, a dead CIA operative's memories, secrets, and skills are implanted into a death-row inmate in hopes that he will complete the operative's mission.
## Runtime Genre Rating Metascore Votes Gross_Earning_in_Mil
## 1 118 Comedy 7.9 72 199932 5.88
## 2 108 Animation 7.1 59 138682 270.40
## 3 108 Action 8.0 65 928720 363.00
## 4 107 Animation 7.6 81 286674 248.70
## 5 117 Horror 7.3 62 443608 138.20
## 6 115 Action 7.5 72 624349 232.60
## 7 116 Drama 7.5 67 247602 10.64
## 8 116 Drama 7.9 81 618155 100.50
## 9 116 Action 7.4 70 292572 36.26
## 10 108 Animation 8.0 78 448202 341.20
## 11 106 Drama 7.4 51 217804 56.25
## 12 116 Drama 7.0 41 362475 100.00
## 13 118 Action 7.6 72 179142 2.13
## 14 106 Animation 8.4 79 213278 5.02
## 15 117 Horror 6.2 51 87143 1.33
## 16 111 Drama 7.4 99 283284 27.85
## 17 106 Crime 8.1 48 155037 NA
## 18 112 Crime 6.5 96 175021 75.40
## 19 110 Action 6.2 32 167223 126.60
## 20 114 Biography 7.1 88 187336 43.03
## 21 102 Action 7.6 79 210211 26.86
## 22 106 Adventure 7.4 52 263236 364.00
## 23 115 Action 6.3 80 68712 12.63
## 24 101 Adventure 7.9 32 116946 5.20
## 25 103 Action 7.2 66 298769 72.08
## 26 115 Biography 7.2 42 133066 12.79
## 27 117 Action 6.7 62 208626 128.30
## 28 118 Action 5.3 33 27911 0.18
## 29 112 Action 5.2 51 102927 34.92
## 30 110 Comedy 6.1 67 82665 46.84
## 31 107 Action 6.3 81 165058 127.40
## 32 118 Biography 8.0 77 220953 51.74
## 33 118 Action 6.1 23 143253 58.70
## 34 107 Action 5.5 59 86673 26.83
## 35 115 Action 5.7 60 189275 54.65
## 36 111 Comedy 6.2 55 104106 60.32
## 37 108 Action 6.0 49 92285 79.21
## 38 104 Comedy 7.3 77 109128 14.43
## 39 107 Action 7.1 68 156855 61.43
## 40 106 Comedy 6.3 72 126802 30.08
## 41 114 Action 6.1 35 100537 48.39
## 42 111 Drama 6.6 26 36789 18.71
## 43 112 Action 6.0 40 86384 82.05
## 44 103 Action 5.9 42 127716 45.54
## 45 113 Action 6.3 36 61406 14.27
## Director Actor
## 1 Matt Ross Viggo Mortensen
## 2 Garth Jennings Matthew McConaughey
## 3 Tim Miller Ryan Reynolds
## 4 Ron Clements Auli'i Cravalho
## 5 M. Night Shyamalan James McAvoy
## 6 Scott Derrickson Benedict Cumberbatch
## 7 Tom Ford Amy Adams
## 8 Denis Villeneuve Amy Adams
## 9 Shane Black Russell Crowe
## 10 Byron Howard Ginnifer Goodwin
## 11 Thea Sharrock Emilia Clarke
## 12 Morten Tyldum Jennifer Lawrence
## 13 Sang-ho Yeon Gong Yoo
## 14 Makoto Shinkai Ryûnosuke Kamiki
## 15 Nicolas Winding Refn Elle Fanning
## 16 Barry Jenkins Mahershala Ali
## 17 Oriol Paulo Mario Casas
## 18 Tate Taylor Emily Blunt
## 19 David Yates Alexander Skarsgård
## 20 Todd Phillips Jonah Hill
## 21 David Mackenzie Chris Pine
## 22 Jon Favreau Neel Sethi
## 23 John Hillcoat Casey Affleck
## 24 Taika Waititi Sam Neill
## 25 Dan Trachtenberg John Goodman
## 26 John Lee Hancock Michael Keaton
## 27 Paul Feig Melissa McCarthy
## 28 Ana Lily Amirpour Suki Waterhouse
## 29 J Blakeson Chloë Grace Moretz
## 30 Christian Ditter Dakota Johnson
## 31 Rawson Marshall Thurber Dwayne Johnson
## 32 Garth Davis Dev Patel
## 33 Edward Zwick Tom Cruise
## 34 Paul W.S. Anderson Milla Jovovich
## 35 Justin Kurzel Michael Fassbender
## 36 John Hamburg Zoey Deutch
## 37 James DeMonaco Frank Grillo
## 38 Kelly Fremon Craig Hailee Steinfeld
## 39 Peter Berg Mark Wahlberg
## 40 Ethan Coen Josh Brolin
## 41 Cedric Nicolas-Troyan Chris Hemsworth
## 42 Ross Katz Benjamin Walker
## 43 Dave Green Megan Fox
## 44 Yimou Zhang Matt Damon
## 45 Ariel Vromen Kevin Costner
avggross <- movies_df %>%
group_by(Genre) %>%
summarise(average_gross = mean(Gross_Earning_in_Mil, na.rm=TRUE)) %>%
arrange(., -average_gross)
avggross
## # A tibble: 8 x 2
## Genre average_gross
## <fct> <dbl>
## 1 Animation 208.
## 2 Adventure 138.
## 3 Action 107.
## 4 Horror 80.6
## 5 Biography 62.4
## 6 Crime 57.3
## 7 Comedy 50.7
## 8 Drama 36.4
Animation had the highest gross average earnings of $208.4M from the runtime of 100 to 120.