#Install packages

#install.packages(‘rvest’) #install.packages(‘dplyr’) #install.packages(‘ggplot2’)

Load packages

#Loading the rvest package
library('ggplot2')
library('rvest')
library('dplyr')
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
#Specifying the url for desired website to be scraped
url <- 'http://www.imdb.com/search/title?count=100&release_date=2016,2016&title_type=feature'

#Reading the HTML code from the website
webpage <- read_html(url)
#Using CSS selectors to scrape the rankings section
rank_data_html <- html_nodes(webpage,'.text-primary')

#Converting the ranking data to text
rank_data <- html_text(rank_data_html)

#Let's have a look at the rankings
head(rank_data)
## [1] "1." "2." "3." "4." "5." "6."
#Data-Preprocessing: Converting rankings to numerical
rank_data<-as.numeric(rank_data)

#Let's have another look at the rankings
head(rank_data)
## [1] 1 2 3 4 5 6
#Using CSS selectors to scrape the title section
title_data_html <- html_nodes(webpage,'.lister-item-header a')

#Converting the title data to text
title_data <- html_text(title_data_html)

#Let's have a look at the title
head(title_data)
## [1] "Suicide Squad"     "The Conjuring 2"   "Captain Fantastic"
## [4] "Sing"              "Deadpool"          "Hidden Figures"
#Using CSS selectors to scrape the description section
description_data_html <- html_nodes(webpage,'.ratings-bar+ .text-muted')

#Converting the description data to text
description_data <- html_text(description_data_html)

#Let's have a look at the description data
head(description_data)
## [1] "\nA secret government agency recruits some of the most dangerous incarcerated super-villains to form a defensive task force. Their first mission: save the world from the apocalypse."                                                             
## [2] "\nEd and Lorraine Warren travel to North London to help a single mother raising four children alone in a house plagued by a supernatural spirit."                                                                                                  
## [3] "\nIn the forests of the Pacific Northwest, a father devoted to raising his six kids with a rigorous physical and intellectual education is forced to leave his paradise and enter the world, challenging his idea of what it means to be a parent."
## [4] "\nIn a city of humanoid animals, a hustling theater impresario's attempt to save his theater with a singing competition becomes grander than he anticipates even as its finalists find that their lives will never be the same."                   
## [5] "\nA wisecracking mercenary gets experimented on and becomes immortal but ugly, and sets out to track down the man who ruined his looks."                                                                                                           
## [6] "\nThe story of a team of female African-American mathematicians who served a vital role in NASA during the early years of the U.S. space program."
#Data-Preprocessing: removing '\n'
description_data<-gsub("\n","",description_data)

#Let's have another look at the description data 
head(description_data)
## [1] "A secret government agency recruits some of the most dangerous incarcerated super-villains to form a defensive task force. Their first mission: save the world from the apocalypse."                                                             
## [2] "Ed and Lorraine Warren travel to North London to help a single mother raising four children alone in a house plagued by a supernatural spirit."                                                                                                  
## [3] "In the forests of the Pacific Northwest, a father devoted to raising his six kids with a rigorous physical and intellectual education is forced to leave his paradise and enter the world, challenging his idea of what it means to be a parent."
## [4] "In a city of humanoid animals, a hustling theater impresario's attempt to save his theater with a singing competition becomes grander than he anticipates even as its finalists find that their lives will never be the same."                   
## [5] "A wisecracking mercenary gets experimented on and becomes immortal but ugly, and sets out to track down the man who ruined his looks."                                                                                                           
## [6] "The story of a team of female African-American mathematicians who served a vital role in NASA during the early years of the U.S. space program."
#Using CSS selectors to scrape the Movie runtime section
runtime_data_html <- html_nodes(webpage,'.text-muted .runtime')

#Converting the runtime data to text
runtime_data <- html_text(runtime_data_html)

#Let's have a look at the runtime
head(runtime_data)
## [1] "123 min" "134 min" "118 min" "108 min" "108 min" "127 min"
#Data-Preprocessing: removing mins and converting it to numerical

runtime_data<-gsub(" min","",runtime_data)
runtime_data<-as.numeric(runtime_data)

#Let's have another look at the runtime data
head(runtime_data)
## [1] 123 134 118 108 108 127
#Using CSS selectors to scrape the Movie genre section
genre_data_html <- html_nodes(webpage,'.genre')

#Converting the genre data to text
genre_data <- html_text(genre_data_html)

#Let's have a look at the runtime
head(genre_data)
## [1] "\nAction, Adventure, Fantasy            "
## [2] "\nHorror, Mystery, Thriller            " 
## [3] "\nComedy, Drama            "             
## [4] "\nAnimation, Comedy, Family            " 
## [5] "\nAction, Adventure, Comedy            " 
## [6] "\nBiography, Drama, History            "
#Data-Preprocessing: removing \n
genre_data<-gsub("\n","",genre_data)

#Data-Preprocessing: removing excess spaces
genre_data<-gsub(" ","",genre_data)

#taking only the first genre of each movie
genre_data<-gsub(",.*","",genre_data)

#Convering each genre from text to factor
genre_data<-as.factor(genre_data)

#Let's have another look at the genre data
head(genre_data)
## [1] Action    Horror    Comedy    Animation Action    Biography
## Levels: Action Adventure Animation Biography Comedy Crime Drama Horror
#Using CSS selectors to scrape the IMDB rating section
rating_data_html <- html_nodes(webpage,'.ratings-imdb-rating strong')

#Converting the ratings data to text
rating_data <- html_text(rating_data_html)

#Let's have a look at the ratings
head(rating_data)
## [1] "5.9" "7.3" "7.9" "7.1" "8.0" "7.8"
#Data-Preprocessing: converting ratings to numerical
rating_data<-as.numeric(rating_data)

#Let's have another look at the ratings data
head(rating_data)
## [1] 5.9 7.3 7.9 7.1 8.0 7.8
#Using CSS selectors to scrape the votes section
votes_data_html <- html_nodes(webpage,'.sort-num_votes-visible span:nth-child(2)')

#Converting the votes data to text
votes_data <- html_text(votes_data_html)

#Let's have a look at the votes data
head(votes_data)
## [1] "622,860" "239,793" "199,932" "138,682" "928,720" "208,173"
#Data-Preprocessing: removing commas
votes_data<-gsub(",","",votes_data)

#Data-Preprocessing: converting votes to numerical
votes_data<-as.numeric(votes_data)

#Let's have another look at the votes data
head(votes_data)
## [1] 622860 239793 199932 138682 928720 208173
#Using CSS selectors to scrape the directors section
directors_data_html <- html_nodes(webpage,'.text-muted+ p a:nth-child(1)')

#Converting the directors data to text
directors_data <- html_text(directors_data_html)

#Let's have a look at the directors data
head(directors_data)
## [1] "David Ayer"     "James Wan"      "Matt Ross"      "Garth Jennings"
## [5] "Tim Miller"     "Theodore Melfi"
#Data-Preprocessing: converting directors data into factors
directors_data<-as.factor(directors_data)

#Using CSS selectors to scrape the actors section
actors_data_html <- html_nodes(webpage,'.lister-item-content .ghost+ a')

#Converting the gross actors data to text
actors_data <- html_text(actors_data_html)

#Let's have a look at the actors data
head(actors_data)
## [1] "Will Smith"          "Vera Farmiga"        "Viggo Mortensen"    
## [4] "Matthew McConaughey" "Ryan Reynolds"       "Taraji P. Henson"
#Using CSS selectors to scrape the metascore section
metascore_data_html <- html_nodes(webpage,'.metascore')

#Converting the runtime data to text
metascore_data <- html_text(metascore_data_html)

#Let's have a look at the metascore data
head(metascore_data)
## [1] "40        " "65        " "72        " "59        " "65        "
## [6] "74        "
#Data-Preprocessing: removing extra space in metascore
metascore_data<-gsub(" ","",metascore_data)

#Lets check the length of metascore data
length(metascore_data)
## [1] 96

34,64,82,88

for (i in c(78,81,82,88)){

a<-metascore_data[1:(i-1)]

b<-metascore_data[i:length(metascore_data)]

metascore_data<-append(a,list("NA"))

metascore_data<-append(metascore_data,b)

}

#Data-Preprocessing: converting metascore to numerical
metascore_data<-as.numeric(metascore_data)
## Warning: NAs introduced by coercion

## Warning: NAs introduced by coercion

## Warning: NAs introduced by coercion

## Warning: NAs introduced by coercion
#Let's have another look at length of the metascore data

length(metascore_data)
## [1] 100
#Let's look at summary statistics
summary(metascore_data)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max.    NA's 
##   23.00   46.75   59.50   59.15   72.00   99.00       4
##scrape the gross revenue section
gross_data_html <- html_nodes(webpage,'.ghost~ .text-muted+ span')

##convert gross revenue to text
gross_data <- html_text(gross_data_html)

#Data-Preprocessing: removing '$' and 'M' signs
gross_data<-gsub("M","",gross_data)

gross_data<-substring(gross_data,2,6)

#Let's check the length of gross data
length(gross_data)
## [1] 89

34,64,82,87,88,89,96

head(gross_data)
## [1] "325.1" "102.4" "5.88"  "270.4" "363.0" "169.6"
length(gross_data)
## [1] 89
#Filling missing entries with NA
for (i in c(34,50,55,60,62,63,82,87,88,89,96)){

a<-gross_data[1:(i-1)]

b<-gross_data[i:length(gross_data)]

gross_data<-append(a,list("NA"))

gross_data<-append(gross_data,b)

}

#Data-Preprocessing: converting gross to numerical
gross_data<-as.numeric(gross_data)
## Warning: NAs introduced by coercion

## Warning: NAs introduced by coercion

## Warning: NAs introduced by coercion

## Warning: NAs introduced by coercion

## Warning: NAs introduced by coercion

## Warning: NAs introduced by coercion

## Warning: NAs introduced by coercion

## Warning: NAs introduced by coercion

## Warning: NAs introduced by coercion

## Warning: NAs introduced by coercion

## Warning: NAs introduced by coercion
#Let's have another look at the length of gross data
head(gross_data)
## [1] 325.10 102.40   5.88 270.40 363.00 169.60

34,64,82,88

length(gross_data)
## [1] 100
#Combining all the lists to form a data frame
movies_df<-data.frame(Rank = rank_data, Title = title_data,

Description = description_data, Runtime = runtime_data,

Genre = genre_data, Rating = rating_data,

Metascore = metascore_data, Votes = votes_data,                                                             Gross_Earning_in_Mil = gross_data,

Director = directors_data, Actor = actors_data)

#Structure of the data frame

str(movies_df)
## 'data.frame':    100 obs. of  11 variables:
##  $ Rank                : num  1 2 3 4 5 6 7 8 9 10 ...
##  $ Title               : chr  "Suicide Squad" "The Conjuring 2" "Captain Fantastic" "Sing" ...
##  $ Description         : chr  "A secret government agency recruits some of the most dangerous incarcerated super-villains to form a defensive "| __truncated__ "Ed and Lorraine Warren travel to North London to help a single mother raising four children alone in a house pl"| __truncated__ "In the forests of the Pacific Northwest, a father devoted to raising his six kids with a rigorous physical and "| __truncated__ "In a city of humanoid animals, a hustling theater impresario's attempt to save his theater with a singing compe"| __truncated__ ...
##  $ Runtime             : num  123 134 118 108 108 127 107 117 132 115 ...
##  $ Genre               : Factor w/ 8 levels "Action","Adventure",..: 1 8 5 3 1 4 3 8 1 1 ...
##  $ Rating              : num  5.9 7.3 7.9 7.1 8 7.8 7.6 7.3 6.9 7.5 ...
##  $ Metascore           : num  40 65 72 59 65 74 81 62 54 72 ...
##  $ Votes               : num  622860 239793 199932 138682 928720 ...
##  $ Gross_Earning_in_Mil: num  325.1 102.4 5.88 270.4 363 ...
##  $ Director            : Factor w/ 99 levels "Alex Proyas",..: 23 42 59 35 95 93 83 56 8 87 ...
##  $ Actor               : chr  "Will Smith" "Vera Farmiga" "Viggo Mortensen" "Matthew McConaughey" ...

Make a visualization of the dataset

qplot(data = movies_df,Runtime,fill = Genre,bins = 30)

Question 1: Based on the above data, which movie from which Genre had the longest runtime?

#find the longest movie runtime 
filter(movies_df, Runtime > 150) %>%
  arrange(., -Runtime)
##   Rank                              Title
## 1   45                     American Honey
## 2   42                            Silence
## 3   64                             Dangal
## 4   62                        The Wailing
## 5   19 Batman v Superman: Dawn of Justice
##                                                                                                                                                                                                          Description
## 1 A teenage girl with nothing to lose joins a traveling magazine sales crew, and gets caught up in a whirlwind of hard partying, law bending and young love as she criss-crosses the Midwest with a band of misfits.
## 2                                  In the 17th century, two Portuguese Jesuit priests travel to Japan in an attempt to locate their mentor, who is rumored to have committed apostasy, and to propagate Catholicism.
## 3                                                           Former wrestler Mahavir Singh Phogat and his two wrestler daughters struggle towards glory at the Commonwealth Games in the face of societal oppression.
## 4                     Soon after a stranger arrives in a little village, a mysterious sickness starts spreading. A policeman, drawn into the incident, is forced to solve the mystery in order to save his daughter.
## 5                                                      Fearing that the actions of Superman are left unchecked, Batman takes on the Man of Steel, while the world wrestles with what kind of a hero it really needs.
##   Runtime     Genre Rating Metascore  Votes Gross_Earning_in_Mil
## 1     163 Adventure    7.0        58  39831                 0.66
## 2     161     Drama    7.2        77 103420                 7.10
## 3     161    Action    8.4        23 165861                12.39
## 4     156    Horror    7.5        46  56393                   NA
## 5     152    Action    6.4        44 653780               330.30
##          Director           Actor
## 1   Andrea Arnold      Sasha Lane
## 2 Martin Scorsese Andrew Garfield
## 3   Nitesh Tiwari      Aamir Khan
## 4     Na Hong-jin    Jun Kunimura
## 5     Zack Snyder     Ben Affleck

American Honey had the longest runtime of 163 minutes

Question 2: Based on the above data, in the Runtime of 130-160 mins, which genre has the highest votes?

# filtering the runtime from 130 mins to 160 mins
filter(movies_df, Runtime > 130 & Runtime < 160) %>%
  arrange(., -Votes)
##    Rank                                   Title
## 1    13              Captain America: Civil War
## 2    19      Batman v Superman: Dawn of Justice
## 3    23            Rogue One: A Star Wars Story
## 4    14                           Hacksaw Ridge
## 5    29 Fantastic Beasts and Where to Find Them
## 6    32                       X-Men: Apocalypse
## 7    36                   Manchester by the Sea
## 8     2                         The Conjuring 2
## 9     9                   The Magnificent Seven
## 10   69                                 Snowden
## 11   31                                13 Hours
## 12   21                          The Handmaiden
## 13   77                      The Lost City of Z
## 14   85                             Miss Sloane
## 15   62                             The Wailing
##                                                                                                                                                                                                                                    Description
## 1                                                                                                                                           Political involvement in the Avengers' affairs causes a rift between Captain America and Iron Man.
## 2                                                                                Fearing that the actions of Superman are left unchecked, Batman takes on the Man of Steel, while the world wrestles with what kind of a hero it really needs.
## 3                                                                              In a time of conflict, a group of unlikely heroes band together on a mission to steal the plans to the Death Star, the Empire's ultimate weapon of destruction.
## 4                         World War II American Army Medic Desmond T. Doss, who served during the Battle of Okinawa, refuses to kill people and becomes the first man in American history to receive the Medal of Honor without firing a shot.
## 5                                                                                    The adventures of writer Newt Scamander in New York's secret community of witches and wizards seventy years before Harry Potter reads his book in school.
## 6                                                                                            In the 1980s the X-Men must defeat an ancient all-powerful mutant, En Sabah Nur, who intends to thrive through bringing destruction to the world.
## 7                                                                                                                                                   A depressed uncle is asked to take care of his teenage nephew after the boy's father dies.
## 8                                                                                               Ed and Lorraine Warren travel to North London to help a single mother raising four children alone in a house plagued by a supernatural spirit.
## 9                                                                          Seven gunmen from a variety of backgrounds are brought together by a vengeful young widow to protect her town from the private army of a destructive industrialist.
## 10                                             The NSA's illegal surveillance techniques are leaked to the public by one of the agency's employees, Edward Snowden, in the form of thousands of classified documents distributed to the press.
## 11                                                                                                                                     During an attack on a U.S. compound in Libya, a security team struggles to make sense out of the chaos.
## 12                                                                                                                              A woman is hired as a handmaiden to a Japanese heiress, but secretly she is involved in a plot to defraud her.
## 13                                                                                 A true-life drama, centering on British explorer Major Percival Fawcett, who disappeared whilst searching for a mysterious city in the Amazon in the 1920s.
## 14 In the high-stakes world of political power-brokers, Elizabeth Sloane is the most sought after and formidable lobbyist in D.C. But when taking on the most powerful opponent of her career, she finds winning may come at too high a price.
## 15                                              Soon after a stranger arrives in a little village, a mysterious sickness starts spreading. A policeman, drawn into the incident, is forced to solve the mystery in order to save his daughter.
##    Runtime     Genre Rating Metascore  Votes Gross_Earning_in_Mil
## 1      147    Action    7.8        75 690610               408.00
## 2      152    Action    6.4        44 653780               330.30
## 3      133    Action    7.8        65 570655               532.10
## 4      139 Biography    8.1        71 455150                67.21
## 5      132 Adventure    7.3        66 421236               234.00
## 6      144    Action    6.9        52 403231               155.40
## 7      137     Drama    7.8        57 256003                47.70
## 8      134    Horror    7.3        65 239793               102.40
## 9      132    Action    6.9        54 189414                93.43
## 10     134 Biography    7.3        49 142467                21.59
## 11     144    Action    7.3        48 124860                52.85
## 12     145     Drama    8.1        84 122558                 2.01
## 13     141 Biography    6.6        39  83230                 8.58
## 14     132     Drama    7.5        64  67371                 3.44
## 15     156    Horror    7.5        46  56393                   NA
##            Director                Actor
## 1     Anthony Russo          Chris Evans
## 2       Zack Snyder          Ben Affleck
## 3    Gareth Edwards       Felicity Jones
## 4        Mel Gibson      Andrew Garfield
## 5       David Yates       Eddie Redmayne
## 6      Bryan Singer         James McAvoy
## 7  Kenneth Lonergan        Casey Affleck
## 8         James Wan         Vera Farmiga
## 9     Antoine Fuqua    Denzel Washington
## 10     Oliver Stone Joseph Gordon-Levitt
## 11      Michael Bay       John Krasinski
## 12   Park Chan-Wook          Kim Min-hee
## 13       James Gray       Charlie Hunnam
## 14      John Madden     Jessica Chastain
## 15      Na Hong-jin         Jun Kunimura
# find the top votes
top_votes <-  movies_df %>%
  group_by(Genre) %>%
  summarise(most_votes = sum(Votes, na.rm=TRUE)) %>%
  arrange(., -most_votes)

top_votes
## # A tibble: 8 x 2
##   Genre     most_votes
##   <fct>          <dbl>
## 1 Action      10066813
## 2 Drama        2602737
## 3 Animation    1706794
## 4 Biography    1677095
## 5 Comedy       1306174
## 6 Horror       1040972
## 7 Adventure    1001073
## 8 Crime         648132

Action movies had 10065947 votes which is the highest votes of all the genres.

Use a plot graph to look deeper into the gross earnings

ggplot(movies_df,aes(x=Runtime,y=Gross_Earning_in_Mil))+
geom_point(aes(size=Rating,col=Genre))
## Warning: Removed 11 rows containing missing values (geom_point).

Question 3: Based on the above data, across all genres which genre has the highest average gross earnings in runtime 100 to 120.

#  Filter the average gross from 100 runtime to 120

filter(movies_df, Runtime > 100 & Runtime < 120)
##    Rank                                            Title
## 1     3                                Captain Fantastic
## 2     4                                             Sing
## 3     5                                         Deadpool
## 4     7                                            Moana
## 5     8                                            Split
## 6    10                                   Doctor Strange
## 7    11                                Nocturnal Animals
## 8    12                                          Arrival
## 9    16                                    The Nice Guys
## 10   17                                         Zootopia
## 11   18                                    Me Before You
## 12   20                                       Passengers
## 13   22                                   Train to Busan
## 14   26                                       Your Name.
## 15   30                                   The Neon Demon
## 16   33                                        Moonlight
## 17   34                              The Invisible Guest
## 18   35                            The Girl on the Train
## 19   38                             The Legend of Tarzan
## 20   40                                         War Dogs
## 21   41                               Hell or High Water
## 22   43                                  The Jungle Book
## 23   44                                         Triple 9
## 24   51                        Hunt for the Wilderpeople
## 25   53                              10 Cloverfield Lane
## 26   54                                      The Founder
## 27   56                    Ghostbusters: Answer the Call
## 28   57                                    The Bad Batch
## 29   58                                     The 5th Wave
## 30   59                                 How to Be Single
## 31   61                             Central Intelligence
## 32   65                                             Lion
## 33   70                      Jack Reacher: Never Go Back
## 34   71                 Resident Evil: The Final Chapter
## 35   74                                 Assassin's Creed
## 36   79                                         Why Him?
## 37   80                         The Purge: Election Year
## 38   83                            The Edge of Seventeen
## 39   86                                Deepwater Horizon
## 40   90                                    Hail, Caesar!
## 41   92                       The Huntsman: Winter's War
## 42   93                                       The Choice
## 43   94 Teenage Mutant Ninja Turtles: Out of the Shadows
## 44   95                                   The Great Wall
## 45   98                                         Criminal
##                                                                                                                                                                                                                                         Description
## 1  In the forests of the Pacific Northwest, a father devoted to raising his six kids with a rigorous physical and intellectual education is forced to leave his paradise and enter the world, challenging his idea of what it means to be a parent.
## 2                     In a city of humanoid animals, a hustling theater impresario's attempt to save his theater with a singing competition becomes grander than he anticipates even as its finalists find that their lives will never be the same.
## 3                                                                                                             A wisecracking mercenary gets experimented on and becomes immortal but ugly, and sets out to track down the man who ruined his looks.
## 4                                                                        In Ancient Polynesia, when a terrible curse incurred by the Demigod Maui reaches Moana's island, she answers the Ocean's call to seek out the Demigod to set things right.
## 5                                                                                     Three girls are kidnapped by a man with a diagnosed 23 distinct personalities. They must try to escape before the apparent emergence of a frightful new 24th.
## 6                                                                                                                        While on a journey of physical and spiritual healing, a brilliant neurosurgeon is drawn into the world of the mystic arts.
## 7                                                                                                                   A wealthy art gallery owner is haunted by her ex-husband's novel, a violent thriller she interprets as a symbolic revenge tale.
## 8                                                                                                                A linguist works with the military to communicate with alien lifeforms after twelve mysterious spacecraft appear around the world.
## 9                                                                                                                       In 1970s Los Angeles, a mismatched pair of private eyes investigate a missing girl and the mysterious death of a porn star.
## 10                                                                                                                In a city of anthropomorphic animals, a rookie bunny cop and a cynical con artist fox must work together to uncover a conspiracy.
## 11                                                                                                                                                A girl in a small town forms an unlikely bond with a recently-paralyzed man she's taking care of.
## 12                                                                                                                         A malfunction in a sleeping pod on a spacecraft traveling to a distant colony planet wakes one passenger 90 years early.
## 13                                                                                                                                 While a zombie virus breaks out in South Korea, passengers struggle to survive on the train from Seoul to Busan.
## 14                                                                                                              Two strangers find themselves linked in a bizarre way. When a connection forms, will distance be the only thing to keep them apart?
## 15                                                      An aspiring model, Jesse, is new to Los Angeles. However, her beauty and youth, which generate intense fascination and jealousy within the fashion industry, may prove themselves sinister.
## 16                                                                             A young African-American man grapples with his identity and sexuality while experiencing the everyday struggles of childhood, adolescence, and burgeoning adulthood.
## 17                                                                                                  A successful entrepreneur accused of murder and a witness preparation expert have less than three hours to come up with an impregnable defense.
## 18                                                                                                                            A divorcee becomes entangled in a missing persons investigation that promises to send shockwaves throughout her life.
## 19                                                                                               Tarzan, having acclimated to life in London, is called back to his former home in the jungle to investigate the activities at a mining encampment.
## 20                                                   Loosely based on the true story of two young men, David Packouz and Efraim Diveroli, who won a three hundred million dollar contract from the Pentagon to arm America's allies in Afghanistan.
## 21                                                                                                                 A divorced father and his ex-con older brother resort to a desperate scheme in order to save their family's ranch in West Texas.
## 22                                            After a threat from the tiger Shere Khan forces him to flee the jungle, a man-cub named Mowgli embarks on a journey of self discovery with the help of panther Bagheera and free-spirited bear Baloo.
## 23                                                                                                               A gang of criminals and corrupt cops plan the murder of a police officer in order to pull off their biggest heist yet across town.
## 24                                                                                                                             A national manhunt is ordered for a rebellious kid and his foster uncle who go missing in the wild New Zealand bush.
## 25                                                                                            After getting in a car accident, a woman is held in a shelter by a man who claims that the outside world is affected by a widespread chemical attack.
## 26                            The story of Ray Kroc, a salesman who turned two brothers' innovative fast food eatery, McDonald's, into the biggest restaurant business in the world, with a combination of ambition, persistence, and ruthlessness.
## 27                                    Following a ghost invasion of Manhattan, paranormal enthusiasts Erin Gilbert and Abby Yates, nuclear engineer Jillian Holtzmann, and subway worker Patty Tolan band together to stop the otherworldly threat.
## 28                                                                                                                                                                                   In a desert dystopia, a young woman is kidnapped by cannibals.
## 29                                                                                           Four waves of increasingly deadly alien attacks have left most of Earth in ruin. Cassie is on the run, desperately trying to save her younger brother.
## 30                                                                                                                                                                        A group of young adults navigate love and relationships in New York City.
## 31                                                                                        After he reconnects with an awkward pal from high school through Facebook, a mild-mannered accountant is lured into the world of international espionage.
## 32                                                                          A five-year-old Indian boy is adopted by an Australian couple after getting lost hundreds of kilometers from home. 25 years later, he sets out to find his lost family.
## 33                                                                                                 Jack Reacher must uncover the truth behind a major government conspiracy in order to clear his name while on the run as a fugitive from the law.
## 34                                          Alice returns to where the nightmare began: The Hive in Raccoon City, where the Umbrella Corporation is gathering its forces for a final strike against the only remaining survivors of the apocalypse.
## 35                                                                                      Callum Lynch explores the memories of his ancestor Aguilar de Nerha and gains the skills of a Master Assassin, before taking on the secret Templar society.
## 36                                                                               A holiday gathering threatens to go off the rails when Ned Fleming realizes that his daughter's Silicon Valley millionaire boyfriend is about to pop the question.
## 37                                                               Former Police Sergeant Barnes becomes head of security for Senator Charlie Roan, a Presidential candidate targeted for death on Purge night due to her vow to eliminate the Purge.
## 38                                                                                                                             High-school life gets even more unbearable for Nadine when her best friend, Krista, starts dating her older brother.
## 39                                                                       A dramatization of the disaster in April 2010, when the offshore drilling rig called the Deepwater Horizon exploded, resulting in the worst oil spill in American history.
## 40                                                                                                                                                                         A Hollywood fixer in the 1950s works to keep the studio's stars in line.
## 41                                              Eric and fellow warrior Sara, raised as members of ice Queen Freya's army, try to conceal their forbidden love as they fight to survive the wicked intentions of both Freya and her sister Ravenna.
## 42                                                                                                    Travis and Gabby first meet as neighbors in a small coastal town and wind up in a relationship that is tested by life's most defining events.
## 43                                                                                 The Turtles get into another battle with their enemy the Shredder, who has acquired new allies: the mutant thugs Bebop and Rocksteady and the alien being Krang.
## 44                                                                            In ancient China, a group of European mercenaries encounters a secret army that maintains and defends the Great Wall of China against a horde of monstrous creatures.
## 45                                             In a last-ditch effort to stop a diabolical plot, a dead CIA operative's memories, secrets, and skills are implanted into a death-row inmate in hopes that he will complete the operative's mission.
##    Runtime     Genre Rating Metascore  Votes Gross_Earning_in_Mil
## 1      118    Comedy    7.9        72 199932                 5.88
## 2      108 Animation    7.1        59 138682               270.40
## 3      108    Action    8.0        65 928720               363.00
## 4      107 Animation    7.6        81 286674               248.70
## 5      117    Horror    7.3        62 443608               138.20
## 6      115    Action    7.5        72 624349               232.60
## 7      116     Drama    7.5        67 247602                10.64
## 8      116     Drama    7.9        81 618155               100.50
## 9      116    Action    7.4        70 292572                36.26
## 10     108 Animation    8.0        78 448202               341.20
## 11     106     Drama    7.4        51 217804                56.25
## 12     116     Drama    7.0        41 362475               100.00
## 13     118    Action    7.6        72 179142                 2.13
## 14     106 Animation    8.4        79 213278                 5.02
## 15     117    Horror    6.2        51  87143                 1.33
## 16     111     Drama    7.4        99 283284                27.85
## 17     106     Crime    8.1        48 155037                   NA
## 18     112     Crime    6.5        96 175021                75.40
## 19     110    Action    6.2        32 167223               126.60
## 20     114 Biography    7.1        88 187336                43.03
## 21     102    Action    7.6        79 210211                26.86
## 22     106 Adventure    7.4        52 263236               364.00
## 23     115    Action    6.3        80  68712                12.63
## 24     101 Adventure    7.9        32 116946                 5.20
## 25     103    Action    7.2        66 298769                72.08
## 26     115 Biography    7.2        42 133066                12.79
## 27     117    Action    6.7        62 208626               128.30
## 28     118    Action    5.3        33  27911                 0.18
## 29     112    Action    5.2        51 102927                34.92
## 30     110    Comedy    6.1        67  82665                46.84
## 31     107    Action    6.3        81 165058               127.40
## 32     118 Biography    8.0        77 220953                51.74
## 33     118    Action    6.1        23 143253                58.70
## 34     107    Action    5.5        59  86673                26.83
## 35     115    Action    5.7        60 189275                54.65
## 36     111    Comedy    6.2        55 104106                60.32
## 37     108    Action    6.0        49  92285                79.21
## 38     104    Comedy    7.3        77 109128                14.43
## 39     107    Action    7.1        68 156855                61.43
## 40     106    Comedy    6.3        72 126802                30.08
## 41     114    Action    6.1        35 100537                48.39
## 42     111     Drama    6.6        26  36789                18.71
## 43     112    Action    6.0        40  86384                82.05
## 44     103    Action    5.9        42 127716                45.54
## 45     113    Action    6.3        36  61406                14.27
##                   Director                Actor
## 1                Matt Ross      Viggo Mortensen
## 2           Garth Jennings  Matthew McConaughey
## 3               Tim Miller        Ryan Reynolds
## 4             Ron Clements      Auli'i Cravalho
## 5       M. Night Shyamalan         James McAvoy
## 6         Scott Derrickson Benedict Cumberbatch
## 7                 Tom Ford            Amy Adams
## 8         Denis Villeneuve            Amy Adams
## 9              Shane Black        Russell Crowe
## 10            Byron Howard     Ginnifer Goodwin
## 11           Thea Sharrock        Emilia Clarke
## 12           Morten Tyldum    Jennifer Lawrence
## 13            Sang-ho Yeon             Gong Yoo
## 14          Makoto Shinkai     Ryûnosuke Kamiki
## 15    Nicolas Winding Refn         Elle Fanning
## 16           Barry Jenkins       Mahershala Ali
## 17             Oriol Paulo          Mario Casas
## 18             Tate Taylor          Emily Blunt
## 19             David Yates  Alexander Skarsgård
## 20           Todd Phillips           Jonah Hill
## 21         David Mackenzie           Chris Pine
## 22             Jon Favreau           Neel Sethi
## 23           John Hillcoat        Casey Affleck
## 24           Taika Waititi            Sam Neill
## 25        Dan Trachtenberg         John Goodman
## 26        John Lee Hancock       Michael Keaton
## 27               Paul Feig     Melissa McCarthy
## 28       Ana Lily Amirpour      Suki Waterhouse
## 29              J Blakeson   Chloë Grace Moretz
## 30        Christian Ditter       Dakota Johnson
## 31 Rawson Marshall Thurber       Dwayne Johnson
## 32             Garth Davis            Dev Patel
## 33            Edward Zwick           Tom Cruise
## 34      Paul W.S. Anderson       Milla Jovovich
## 35           Justin Kurzel   Michael Fassbender
## 36            John Hamburg          Zoey Deutch
## 37          James DeMonaco         Frank Grillo
## 38      Kelly Fremon Craig     Hailee Steinfeld
## 39              Peter Berg        Mark Wahlberg
## 40              Ethan Coen          Josh Brolin
## 41   Cedric Nicolas-Troyan      Chris Hemsworth
## 42               Ross Katz      Benjamin Walker
## 43              Dave Green            Megan Fox
## 44             Yimou Zhang           Matt Damon
## 45            Ariel Vromen        Kevin Costner
avggross <- movies_df %>%
  group_by(Genre) %>%
  summarise(average_gross = mean(Gross_Earning_in_Mil, na.rm=TRUE)) %>%
  arrange(., -average_gross)

avggross
## # A tibble: 8 x 2
##   Genre     average_gross
##   <fct>             <dbl>
## 1 Animation         208. 
## 2 Adventure         138. 
## 3 Action            107. 
## 4 Horror             80.6
## 5 Biography          62.4
## 6 Crime              57.3
## 7 Comedy             50.7
## 8 Drama              36.4

Animation had the highest gross average earnings of $208.4M from the runtime of 100 to 120.