# hello world
print("Hello World")
## [1] "Hello World"
a <- "Hello World"
a
## [1] "Hello World"
# Loading the required packages
library(tidyverse)
## Warning: package 'tidyverse' was built under R version 3.5.3
## -- Attaching packages --------------------------- tidyverse 1.2.1 --
## v ggplot2 3.2.1 v purrr 0.3.2
## v tibble 2.1.3 v dplyr 0.8.3
## v tidyr 1.0.0 v stringr 1.4.0
## v readr 1.3.1 v forcats 0.4.0
## Warning: package 'ggplot2' was built under R version 3.5.3
## Warning: package 'tibble' was built under R version 3.5.3
## Warning: package 'tidyr' was built under R version 3.5.3
## Warning: package 'readr' was built under R version 3.5.3
## Warning: package 'purrr' was built under R version 3.5.3
## Warning: package 'dplyr' was built under R version 3.5.3
## Warning: package 'forcats' was built under R version 3.5.3
## -- Conflicts ------------------------------ tidyverse_conflicts() --
## x dplyr::filter() masks stats::filter()
## x dplyr::lag() masks stats::lag()
library(rvest)
## Warning: package 'rvest' was built under R version 3.5.3
## Loading required package: xml2
## Warning: package 'xml2' was built under R version 3.5.3
##
## Attaching package: 'rvest'
## The following object is masked from 'package:purrr':
##
## pluck
## The following object is masked from 'package:readr':
##
## guess_encoding
# Specifying the url for the desired website to be scrapped
url <- "https://www.imdb.com/search/title/?count=100&genres=action&release_date=2018,2018&title_type=feature"
# Reading the html code from the website
webpage <- read_html(x = url)
# Rank: The rank of the film from 1 to 100 on the list of 100 films released in 2018.
# Title: The title of the film.
# Description: The description of the film.
# Runtime: The duration of the film.
# Genre: The genre of the film,
# Rating: The IMDb rating of the film.
# Votes: Votes cast in favor of the film.
# Gross_Earning_in_Mil: The gross earnings of the film in millions.
#####################################################################
# Start by scraping the Rank field. For that, we’ll use the selector gadget to get the specific CSS selectors that encloses the rankings.
rank_data_html <- html_nodes(x = webpage, css = '.text-primary')
rank_data_html
## {xml_nodeset (100)}
## [1] <span class="lister-item-index unbold text-primary">1.</span>
## [2] <span class="lister-item-index unbold text-primary">2.</span>
## [3] <span class="lister-item-index unbold text-primary">3.</span>
## [4] <span class="lister-item-index unbold text-primary">4.</span>
## [5] <span class="lister-item-index unbold text-primary">5.</span>
## [6] <span class="lister-item-index unbold text-primary">6.</span>
## [7] <span class="lister-item-index unbold text-primary">7.</span>
## [8] <span class="lister-item-index unbold text-primary">8.</span>
## [9] <span class="lister-item-index unbold text-primary">9.</span>
## [10] <span class="lister-item-index unbold text-primary">10.</span>
## [11] <span class="lister-item-index unbold text-primary">11.</span>
## [12] <span class="lister-item-index unbold text-primary">12.</span>
## [13] <span class="lister-item-index unbold text-primary">13.</span>
## [14] <span class="lister-item-index unbold text-primary">14.</span>
## [15] <span class="lister-item-index unbold text-primary">15.</span>
## [16] <span class="lister-item-index unbold text-primary">16.</span>
## [17] <span class="lister-item-index unbold text-primary">17.</span>
## [18] <span class="lister-item-index unbold text-primary">18.</span>
## [19] <span class="lister-item-index unbold text-primary">19.</span>
## [20] <span class="lister-item-index unbold text-primary">20.</span>
## ...
rank_data <- html_text(x = rank_data_html)
rank_data
## [1] "1." "2." "3." "4." "5." "6." "7." "8." "9." "10."
## [11] "11." "12." "13." "14." "15." "16." "17." "18." "19." "20."
## [21] "21." "22." "23." "24." "25." "26." "27." "28." "29." "30."
## [31] "31." "32." "33." "34." "35." "36." "37." "38." "39." "40."
## [41] "41." "42." "43." "44." "45." "46." "47." "48." "49." "50."
## [51] "51." "52." "53." "54." "55." "56." "57." "58." "59." "60."
## [61] "61." "62." "63." "64." "65." "66." "67." "68." "69." "70."
## [71] "71." "72." "73." "74." "75." "76." "77." "78." "79." "80."
## [81] "81." "82." "83." "84." "85." "86." "87." "88." "89." "90."
## [91] "91." "92." "93." "94." "95." "96." "97." "98." "99." "100."
# converting the data into num
rank_data <- as.numeric(x = rank_data)
#####################################################################
# Now you can select all the titles.
title_data_html <- html_nodes(x = webpage, css = '.lister-item-header a')
title_data_html
## {xml_nodeset (100)}
## [1] <a href="/title/tt4633694/?ref_=adv_li_tt">Spider-Man: Into the Spi ...
## [2] <a href="/title/tt1477834/?ref_=adv_li_tt">Aquaman</a>
## [3] <a href="/title/tt4154756/?ref_=adv_li_tt">Avengers: Infinity War</a>
## [4] <a href="/title/tt4532826/?ref_=adv_li_tt">Robin Hood</a>
## [5] <a href="/title/tt5463162/?ref_=adv_li_tt">Deadpool 2</a>
## [6] <a href="/title/tt4912910/?ref_=adv_li_tt">Mission: Impossible - Fa ...
## [7] <a href="/title/tt1825683/?ref_=adv_li_tt">Black Panther</a>
## [8] <a href="/title/tt1270797/?ref_=adv_li_tt">Venom</a>
## [9] <a href="/title/tt1571234/?ref_=adv_li_tt">Mortal Engines</a>
## [10] <a href="/title/tt4701182/?ref_=adv_li_tt">Bumblebee</a>
## [11] <a href="/title/tt3778644/?ref_=adv_li_tt">Solo: A Star Wars Story</a>
## [12] <a href="/title/tt6533240/?ref_=adv_li_tt">Nomis</a>
## [13] <a href="/title/tt1677720/?ref_=adv_li_tt">Ready Player One</a>
## [14] <a href="/title/tt3829266/?ref_=adv_li_tt">The Predator</a>
## [15] <a href="/title/tt2873282/?ref_=adv_li_tt">Red Sparrow</a>
## [16] <a href="/title/tt4530422/?ref_=adv_li_tt">Overlord</a>
## [17] <a href="/title/tt3606756/?ref_=adv_li_tt">Incredibles 2</a>
## [18] <a href="/title/tt4779682/?ref_=adv_li_tt">The Meg</a>
## [19] <a href="/title/tt4881806/?ref_=adv_li_tt">Jurassic World: Fallen K ...
## [20] <a href="/title/tt5164214/?ref_=adv_li_tt">Ocean's Eight</a>
## ...
title_data <- html_text(x = title_data_html)
title_data
## [1] "Spider-Man: Into the Spider-Verse"
## [2] "Aquaman"
## [3] "Avengers: Infinity War"
## [4] "Robin Hood"
## [5] "Deadpool 2"
## [6] "Mission: Impossible - Fallout"
## [7] "Black Panther"
## [8] "Venom"
## [9] "Mortal Engines"
## [10] "Bumblebee"
## [11] "Solo: A Star Wars Story"
## [12] "Nomis"
## [13] "Ready Player One"
## [14] "The Predator"
## [15] "Red Sparrow"
## [16] "Overlord"
## [17] "Incredibles 2"
## [18] "The Meg"
## [19] "Jurassic World: Fallen Kingdom"
## [20] "Ocean's Eight"
## [21] "Hunter Killer"
## [22] "The Happytime Murders"
## [23] "Upgrade"
## [24] "Mandy"
## [25] "Slaughterhouse Rulez"
## [26] "The Girl in the Spider's Web"
## [27] "Dragged Across Concrete"
## [28] "Ant-Man and the Wasp"
## [29] "Mile 22"
## [30] "Den of Thieves"
## [31] "Tomb Raider"
## [32] "Peppermint"
## [33] "The Spy Who Dumped Me"
## [34] "Death Wish"
## [35] "Sicario: Day of the Soldado"
## [36] "The Equalizer 2"
## [37] "Game Night"
## [38] "Destroyer"
## [39] "Outlaw King"
## [40] "12 Strong"
## [41] "Hotel Artemis"
## [42] "Rampage"
## [43] "Maze Runner: The Death Cure"
## [44] "The Commuter"
## [45] "Extinction"
## [46] "How It Ends"
## [47] "Skyscraper"
## [48] "Pacific Rim: Uprising"
## [49] "Assassination Nation"
## [50] "The First Purge"
## [51] "Kin"
## [52] "Anon"
## [53] "Escape Plan 2: Hades"
## [54] "Adrift"
## [55] "The Darkest Minds"
## [56] "Ying"
## [57] "Occupation"
## [58] "The Night Comes for Us"
## [59] "Kursk"
## [60] "Johnny English Strikes Again"
## [61] "Game Over, Man!"
## [62] "The Pool"
## [63] "Future World"
## [64] "Backtrace"
## [65] "Braven"
## [66] "Asher"
## [67] "Teen Titans Go! To the Movies"
## [68] "Skjelvet"
## [69] "Black '47"
## [70] "Galveston"
## [71] "Gringo"
## [72] "Doragon bôru chô: Burorî"
## [73] "Reprisal"
## [74] "The Hurricane Heist"
## [75] "Final Score"
## [76] "211"
## [77] "Entebbe"
## [78] "Hurricane"
## [79] "Speed Kills"
## [80] "The Last Man"
## [81] "A-X-L"
## [82] "Office Uprising"
## [83] "Patient Zero"
## [84] "Superfly"
## [85] "Acts of Violence"
## [86] "Da hong zha"
## [87] "Yip Man ngoi zyun: Cheung Tin Chi"
## [88] "T-34"
## [89] "Traffik"
## [90] "The Domestics"
## [91] "The Debt Collector"
## [92] "Breaking In"
## [93] "Kickboxer: Retaliation"
## [94] "Next Gen"
## [95] "K.G.F: Chapter 1"
## [96] "Accident Man"
## [97] "Dead in a Week (Or Your Money Back)"
## [98] "Lukas"
## [99] "Batman Ninja"
## [100] "Mou seung"
class(title_data)
## [1] "character"
#####################################################################
description_data_html <- html_nodes(x = webpage, css = '.ratings-bar+ .text-muted')
description_data_html
## {xml_nodeset (100)}
## [1] <p class="text-muted">\n Teen Miles Morales becomes Spider-Man o ...
## [2] <p class="text-muted">\n Arthur Curry, the human-born heir to th ...
## [3] <p class="text-muted">\n The Avengers and their allies must be w ...
## [4] <p class="text-muted">\n A war-hardened Crusader and his Moorish ...
## [5] <p class="text-muted">\n Foul-mouthed mutant mercenary Wade Wils ...
## [6] <p class="text-muted">\n Ethan Hunt and his IMF team, along with ...
## [7] <p class="text-muted">\n T'Challa, heir to the hidden but advanc ...
## [8] <p class="text-muted">\n A failed reporter is bonded to an alien ...
## [9] <p class="text-muted">\n In a post-apocalyptic world where citie ...
## [10] <p class="text-muted">\n On the run in the year 1987, Bumblebee ...
## [11] <p class="text-muted">\n During an adventure into the criminal u ...
## [12] <p class="text-muted">\n A weathered Lieutenant, his police forc ...
## [13] <p class="text-muted">\n When the creator of a virtual reality c ...
## [14] <p class="text-muted">\n When a young boy accidentally triggers ...
## [15] <p class="text-muted">\n Ballerina Dominika Egorova is recruited ...
## [16] <p class="text-muted">\n A small group of American soldiers find ...
## [17] <p class="text-muted">\n The Incredibles hero family takes on a ...
## [18] <p class="text-muted">\n A group of scientists exploring the Mar ...
## [19] <p class="text-muted">\n When the island's dormant volcano begin ...
## [20] <p class="text-muted">\n Debbie Ocean gathers an all-female crew ...
## ...
description_data <- html_text(x = description_data_html)
description_data
## [1] "\n Teen Miles Morales becomes Spider-Man of his reality, crossing his path with five counterparts from other dimensions to stop a threat for all realities."
## [2] "\n Arthur Curry, the human-born heir to the underwater kingdom of Atlantis, goes on a quest to prevent a war between the worlds of ocean and land."
## [3] "\n The Avengers and their allies must be willing to sacrifice all in an attempt to defeat the powerful Thanos before his blitz of devastation and ruin puts an end to the universe."
## [4] "\n A war-hardened Crusader and his Moorish commander mount an audacious revolt against the corrupt English crown."
## [5] "\n Foul-mouthed mutant mercenary Wade Wilson (a.k.a. Deadpool), brings together a team of fellow mutant rogues to protect a young boy with supernatural abilities from the brutal, time-traveling cyborg Cable."
## [6] "\n Ethan Hunt and his IMF team, along with some familiar allies, race against time after a mission gone wrong."
## [7] "\n T'Challa, heir to the hidden but advanced kingdom of Wakanda, must step forward to lead his people into a new future and must confront a challenger from his country's past."
## [8] "\n A failed reporter is bonded to an alien entity, one of many symbiotes who have invaded Earth. But the being takes a liking to Earth and decides to protect it."
## [9] "\n In a post-apocalyptic world where cities ride on wheels and consume each other to survive, two people meet in London and try to stop a conspiracy."
## [10] "\n On the run in the year 1987, Bumblebee finds refuge in a junkyard in a small California beach town. On the cusp of turning 18 and trying to find her place in the world, Charlie Watson discovers Bumblebee, battle-scarred and broken."
## [11] "\n During an adventure into the criminal underworld, Han Solo meets his future co-pilot Chewbacca and encounters Lando Calrissian years before joining the Rebellion."
## [12] "\n A weathered Lieutenant, his police force, and a local vigilante are all caught up in a dangerous scheme involving a recently arrested, troubled man who's linked to years of female abductions and murders."
## [13] "\n When the creator of a virtual reality called the OASIS dies, he makes a posthumous challenge to all OASIS users to find his Easter Egg, which will give the finder his fortune and control of his world."
## [14] "\n When a young boy accidentally triggers the universe's most lethal hunters' return to Earth, only a ragtag crew of ex-soldiers and a disgruntled scientist can prevent the end of the human race."
## [15] "\n Ballerina Dominika Egorova is recruited to 'Sparrow School,' a Russian intelligence service where she is forced to use her body as a weapon. Her first mission, targeting a C.I.A. agent, threatens to unravel the security of both nations."
## [16] "\n A small group of American soldiers find horror behind enemy lines on the eve of D-Day."
## [17] "\n The Incredibles hero family takes on a new mission, which involves a change in family roles: Bob Parr (Mr Incredible) must manage the house while his wife Helen (Elastigirl) goes out to save the world."
## [18] "\n A group of scientists exploring the Marianas Trench encounter the largest marine predator that has ever existed - the Megalodon."
## [19] "\n When the island's dormant volcano begins roaring to life, Owen and Claire mount a campaign to rescue the remaining dinosaurs from this extinction-level event."
## [20] "\n Debbie Ocean gathers an all-female crew to attempt an impossible heist at New York City's yearly Met Gala."
## [21] "\n An untested American submarine captain teams with U.S. Navy Seals to rescue the Russian president, who has been kidnapped by a rogue general."
## [22] "\n When the puppet cast of a '90s children's TV show begin to get murdered one by one, a disgraced LAPD detective-turned-private eye puppet takes on the case."
## [23] "\n Set in the near-future, technology controls nearly all aspects of life. But when Grey, a self-identified technophobe, has his world turned upside down, his only hope for revenge is an experimental computer chip implant called Stem."
## [24] "\n The enchanted lives of a couple in a secluded forest are brutally shattered by a nightmarish hippie cult and their demon-biker henchmen, propelling a man into a spiraling, surreal rampage of vengeance."
## [25] "\n An illustrious British boarding school becomes a bloody battleground when a mysterious sinkhole appears at a nearby fracking site unleashing unspeakable horror."
## [26] "\n Young computer hacker Lisbeth Salander and journalist Mikael Blomkvist find themselves caught in a web of spies, cybercriminals and corrupt government officials."
## [27] "\n Once two overzealous cops get suspended from the force, they must delve into the criminal underworld to get their proper compensation."
## [28] "\n As Scott Lang balances being both a Super Hero and a father, Hope van Dyne and Dr. Hank Pym present an urgent new mission that finds the Ant-Man fighting alongside The Wasp to uncover secrets from their past."
## [29] "\n A small team of elite American intelligence officers, part of a top-secret tactical command unit, try to smuggle a mysterious police officer with sensitive information out of Indonesia."
## [30] "\n A gritty crime saga which follows the lives of an elite unit of the LA County Sheriff's Dept. and the state's most successful bank robbery crew as the outlaws plan a seemingly impossible heist on the Federal Reserve Bank."
## [31] "\n Lara Croft, the fiercely independent daughter of a missing adventurer, must push herself beyond her limits when she discovers the island where her father disappeared."
## [32] "\n Five years after her husband and daughter are killed in a senseless act of violence, a woman comes back from self-imposed exile to seek revenge against those responsible and the system that let them go free."
## [33] "\n Audrey and Morgan are best friends who unwittingly become entangled in an international conspiracy when one of the women discovers the boyfriend who dumped her was actually a spy."
## [34] "\n Dr. Paul Kersey is an experienced trauma surgeon, a man who has spent his life saving lives. After an attack on his family, Paul embarks on his own mission for justice."
## [35] "\n The drug war on the U.S.-Mexico border has escalated as the cartels have begun trafficking terrorists across the US border. To fight the war, federal agent Matt Graver re-teams with the mercurial Alejandro."
## [36] "\n Robert McCall serves an unflinching justice for the exploited and oppressed, but how far will he go when that is someone he loves?"
## [37] "\n A group of friends who meet regularly for game nights find themselves entangled in a real-life mystery when the shady brother of one of them is seemingly kidnapped by dangerous gangsters."
## [38] "\n A police detective reconnects with people from an undercover assignment in her distant past in order to make peace."
## [39] "\n A true David vs. Goliath story of how the 14th century Scottish 'Outlaw King' Robert the Bruce used cunning and bravery to defeat the much larger and better equipped occupying English army."
## [40] "\n 12 Strong tells the story of the first Special Forces team deployed to Afghanistan after 9/11; under the leadership of a new captain, the team must work with an Afghan warlord to take down the Taliban."
## [41] "\n Set in riot-torn, near-future Los Angeles, 'Hotel Artemis' follows the Nurse, who runs a secret, members-only emergency room for criminals."
## [42] "\n When three different animals become infected with a dangerous pathogen, a primatologist and a geneticist team up to stop them from destroying Chicago."
## [43] "\n Young hero Thomas embarks on a mission to find a cure for a deadly disease known as \"The Flare\"."
## [44] "\n An Insurance Salesman/Ex-Cop is caught up in a criminal conspiracy during his daily commute home."
## [45] "\n A father has a recurring dream of losing his family. His nightmare turns into reality when the planet is invaded by a force bent on destruction. Fighting for their lives, he comes to realize an unknown strength to keep them safe from harm."
## [46] "\n A desperate man tries to return home to his pregnant fiancée after a mysterious apocalyptic event turns everything to chaos."
## [47] "\n A security expert must infiltrate a burning skyscraper, 225 stories above ground, when his family is trapped inside by criminals."
## [48] "\n Jake Pentecost, son of Stacker Pentecost, reunites with Mako Mori to lead a new generation of Jaeger pilots, including rival Lambert and 15-year-old hacker Amara, against a new Kaiju threat."
## [49] "\n After a malicious data hack exposes the secrets of the perpetually American town of Salem, chaos descents and four girls must fight to survive, while coping with the hack themselves."
## [50] "\n America's third political party, the New Founding Fathers of America, comes to power and conducts an experiment: no laws for 12 hours on Staten Island. No one has to stay on the island, but $5,000 is given to anyone who does."
## [51] "\n Chased by a vengeful criminal, the feds and a gang of otherworldly soldiers, a recently released ex-con, and his adopted teenage brother are forced to go on the run with a weapon of mysterious origin as their only protection."
## [52] "\n In a world without anonymity or crime, a detective meets a woman who threatens their security."
## [53] "\n Years after he fought his way out of an inescapable prison, Ray Breslin has organized a new top-notch security force. But when one of his team members goes missing, Breslin must return to the hell he once escaped from."
## [54] "\n A true story of survival, as a young couple's chance encounter leads them first to love, and then on the adventure of a lifetime as they face one of the most catastrophic hurricanes in recorded history."
## [55] "\n Imprisoned by an adult world that now fears everyone under 18, a group of teens form a resistance group to fight back and reclaim control of their future."
## [56] "\n Life and intrigue in an ancient Chinese court."
## [57] "\n A small group of town residents have to band together after a devastating ground invasion. As they struggle to survive, they realize they must stay one step ahead of their attackers, and work together for a chance to strike back."
## [58] "\n Ito (Joe Taslim), a gangland enforcer, caught amidst a treacherous and violent insurrection within his Triad crime family upon his return home from a stint abroad."
## [59] "\n The film follows the 2000 K-141 Kursk submarine disaster and the governmental negligence that followed. As the sailors fight for survival, their families desperately battle political obstacles and impossible odds to save them."
## [60] "\n After a cyber-attack reveals the identity of all of the active undercover agents in Britain, Johnny English is forced to come out of retirement to find the mastermind hacker."
## [61] "\n Three friends are on the verge of getting their video game financed when their benefactor is taken hostage by terrorists."
## [62] "\n In an abandoned 6-meter deep pool, a couple is stranded there with a deadly predator."
## [63] "\n A young boy searches a future world wasteland for a rumored cure for his dying mother."
## [64] "\n The lone surviving thief of a violent armored car robbery is sprung from a high security facility and administered an experimental drug."
## [65] "\n A logger defends his family from a group of dangerous drug runners."
## [66] "\n An aging hitman's last job goes sideways, forcing him to redeem himself."
## [67] "\n A villain's maniacal plan for world domination sidetracks five teenage superheroes who dream of Hollywood stardom."
## [68] "\n In 1904 an earthquake of magnitude 5.4 on the Richter scale shook Oslo, with an epicenter in the \"Oslo Graben\" which runs under the Norwegian capital. There are now signs that indicate that we can expect a major future earthquake in Oslo."
## [69] "\n Set in Ireland during the Great Famine, the drama follows an Irish Ranger who has been fighting for the British Army abroad, as he abandons his post to reunite with his family."
## [70] "\n After escaping a set up, a dying hitman returns to his hometown of Galveston where he plans his revenge."
## [71] "\n GRINGO, a dark comedy mixed with white-knuckle action and dramatic intrigue, explores the battle of survival for businessman Harold Soyinka (David Oyelowo) when he finds himself crossing the line from law-abiding citizen to wanted criminal."
## [72] "\n Goku and Vegeta encounter Broly, a Saiyan warrior unlike any fighter they've faced before."
## [73] "\n A bank manager haunted by a violent heist that took the life of a coworker teams up with his ex-cop neighbor to bring down the assailant, initiating an explosive counterattack that brings all three men to the breaking point."
## [74] "\n Thieves attempt a massive heist against the U.S. Treasury as a Category 5 hurricane approaches one of its Mint facilities."
## [75] "\n After deadly terrorists abduct his niece at a soccer match, an ex-soldier with lethal fighting skills wages a one-man war to save her and prevent mass destruction."
## [76] "\n While on a routine patrol, an aging cop, his partner and their ride-along get caught in a standoff with a band of former mercenaries robbing a bank."
## [77] "\n Inspired by the true events of the 1976 hijacking of an Air France flight en route from Tel Aviv to Paris, and the most daring rescue mission ever attempted."
## [78] "\n The exploits of 303 Squadron RAF during the Battle of Britain. The squadron consisted of Polish pilots, many of whom were veterans of the air battles involved in Germany's invasion of Poland."
## [79] "\n Speedboat racing champion and multimillionaire, Ben Aronoff, leads a double life that lands him in trouble with the law and drug lords."
## [80] "\n Kurt, combat veteran with PTSD and hallucinations, fortifies his home and builds a secret underground shelter due to doomsday like weather changes. He gets a security job to pay for it and his boss' cute daughter for company."
## [81] "\n A.X.L. is a top-secret, robotic dog who develops a special friendship with Miles and will go to any length to protect his new companion."
## [82] "\n An employee at a weapons factory discovers that an energy drink turns his co-workers into zombies"
## [83] "\n After an unprecedented global pandemic turns the majority of humankind into violent \"Infected,\" a man gifted with the ability to speak the Infected's new language leads the last survivors on a hunt for Patient Zero and a cure."
## [84] "\n With retirement on his mind, a successful young drug dealer sets up one last big job, while dealing with trigger-happy colleagues and the police."
## [85] "\n At a bachelorette party in a nightclub, the bride tells 2 guys offering blow to go away. They abduct her. The groom's 2 big brothers looked after their kid brother as kids and do so now again as veterans, \"looking\" for her and the 2 guys."
## [86] "\n During World War II, five different Chinese people fight their way through Japanese Air Force attacks to protect an important military machine in Chongqing, 1940."
## [87] "\n While keeping a low profile after his defeat by Ip Man, Cheung Tin Chi gets into trouble after getting in a fight with a powerful foreigner."
## [88] "\n In 1944, a courageous group of Russian soldiers managed to escape from German captivity in a half-destroyed legendary T-34 tank. Those were the times of unforgettable bravery, fierce fighting, unbreakable love, and legendary miracles."
## [89] "\n A couple off for a romantic weekend in the mountains are accosted by a biker gang. Alone in the mountains, Brea and John must defend themselves against the gang, who will stop at nothing to protect their secrets."
## [90] "\n In the weeks following an apocalyptic event, a husband and wife venture across the countryside inhabited by deadly factions in search of safety, and must work together as they are pushed to the breaking point in order to survive."
## [91] "\n A classically-trained martial artist goes to work as a mob debt collector. The job seems easy enough, until a client drags him into a situation deeper than could ever be anticipated."
## [92] "\n A woman fights to protect her family during a home invasion."
## [93] "\n Right after winning a fight in Las Vegas, Kurt Sloane is sedated and taken to a prison in Bangkok, where he's forced to fight a 6'10\" giant for freedom and $1M - but this will require some intense training."
## [94] "\n A friendship with a top-secret robot turns a lonely girl's life into a thrilling adventure as they take on bullies, evil bots and a scheming madman."
## [95] "\n Set in the 1970s, it's the story of a fierce rebel who rises against the brutal oppression and becomes the symbol of hope to legions of downtrodden people."
## [96] "\n Mike is a tough hitman, who makes his hits look like accidents or suicide. He's in a gang of hitmen, each with his own style. When his loved ex is killed, Mike looks for those responsible."
## [97] "\n After his ninth unsuccessful attempt on his own life, a young man outsources his suicide to an ageing assassin. \"If you're serious about ending it, you need professional help\""
## [98] "\n A nightclub bouncer in his fifties who's taken punches, literally and figuratively, struggles to raise his 8-year-old daughter."
## [99] "\n Batman, along with a number of his allies and adversaries, finds himself transplanted from modern Gotham City to feudal Japan."
## [100] "\n The Hong Kong police are hunting a counterfeiting gang led by a mastermind code-named \"Painter\". In order to crack the true identity of him, the police recruits gang member Lee Man to unmask \"Painter's\" secret identity."
#Data-Preprocessing: removing '\n'
description_data<-gsub(pattern = "\n",replacement = "",x = description_data)
head(description_data)
## [1] " Teen Miles Morales becomes Spider-Man of his reality, crossing his path with five counterparts from other dimensions to stop a threat for all realities."
## [2] " Arthur Curry, the human-born heir to the underwater kingdom of Atlantis, goes on a quest to prevent a war between the worlds of ocean and land."
## [3] " The Avengers and their allies must be willing to sacrifice all in an attempt to defeat the powerful Thanos before his blitz of devastation and ruin puts an end to the universe."
## [4] " A war-hardened Crusader and his Moorish commander mount an audacious revolt against the corrupt English crown."
## [5] " Foul-mouthed mutant mercenary Wade Wilson (a.k.a. Deadpool), brings together a team of fellow mutant rogues to protect a young boy with supernatural abilities from the brutal, time-traveling cyborg Cable."
## [6] " Ethan Hunt and his IMF team, along with some familiar allies, race against time after a mission gone wrong."
#####################################################################
runtime_data_html <- html_nodes(webpage,'.runtime')
#Converting the runtime data to text
runtime_data <- html_text(runtime_data_html)
#Let's have a look at the runtime
head(runtime_data)
## [1] "117 min" "143 min" "149 min" "116 min" "119 min" "147 min"
runtime_data <- gsub(pattern = "min", replacement = "",x = runtime_data)
head(runtime_data)
## [1] "117 " "143 " "149 " "116 " "119 " "147 "
runtime_data <- as.numeric(x = runtime_data)
####################################################################
genre_data_html <- html_nodes(webpage,'.genre')
#Converting the genre data to text
genre_data <- genre_data_html%>%
html_text(genre_data_html)%>%
str_trim()
#Let's have a look at the runtime
head(genre_data)
## [1] "Animation, Action, Adventure" "Action, Adventure, Fantasy"
## [3] "Action, Adventure, Sci-Fi" "Action, Adventure, Thriller"
## [5] "Action, Adventure, Comedy" "Action, Adventure, Thriller"
genre_data<-gsub("\n","",genre_data)
#Data-Preprocessing: removing excess spaces
genre_data<-gsub(" ","",genre_data)
#Convering each genre from text to factor
genre_data<-as.factor(genre_data)
###################################################################
rating_data_html <- html_nodes(webpage,'.ratings-imdb-rating strong')
#Converting the ratings data to text
rating_data <- html_text(rating_data_html)
#Let's have a look at the ratings
head(rating_data)
## [1] "8.4" "7.0" "8.5" "5.3" "7.7" "7.8"
rating_data<-as.numeric(rating_data)
head(rating_data)
## [1] 8.4 7.0 8.5 5.3 7.7 7.8
##################################################################
votes_data_html <- html_nodes(webpage,'.sort-num_votes-visible span:nth-child(2)')
#Converting the votes data to text
votes_data <- html_text(votes_data_html)
#Let's have a look at the votes data
head(votes_data)
## [1] "264,858" "301,589" "713,910" "50,228" "417,951" "248,812"
votes_data <- gsub(pattern = ",",
replacement = "",
x = votes_data)
head(votes_data)
## [1] "264858" "301589" "713910" "50228" "417951" "248812"
votes_data <- as.numeric(x = votes_data)
##################################################################
movies_df<-data.frame(Rank = rank_data, Title = title_data,
Description = description_data, Runtime = runtime_data,
Genre = genre_data, Rating = rating_data,
Votes = votes_data)
#Structure of the data frame
str(movies_df)
## 'data.frame': 100 obs. of 7 variables:
## $ Rank : num 1 2 3 4 5 6 7 8 9 10 ...
## $ Title : Factor w/ 100 levels "12 Strong","211",..: 76 9 12 69 22 52 16 98 53 19 ...
## $ Description: Factor w/ 100 levels " 12 Strong tells the story of the first Special Forces team deployed to Afghanistan after 9/11; under the le"| __truncated__,..: 79 39 80 22 52 50 78 6 58 69 ...
## $ Runtime : num 117 143 149 116 119 147 134 112 128 114 ...
## $ Genre : Factor w/ 33 levels "Action,Adventure,Biography",..: 32 6 8 9 2 9 8 28 6 8 ...
## $ Rating : num 8.4 7 8.5 5.3 7.7 7.8 7.3 6.7 6.1 6.8 ...
## $ Votes : num 264858 301589 713910 50228 417951 ...
# Analyzing scraped data from the web
qplot(data = movies_df,Runtime,fill = Genre,bins = 30)

ggplot(movies_df,aes(x=Runtime,y=Rating))+
geom_point(aes(size=Votes,col=Genre))
