library("rvest")
## Loading required package: xml2
library("XML")
##
## Attaching package: 'XML'
## The following object is masked from 'package:rvest':
##
## xml
# IMDB Top 250 Movies
url = "http://www.imdb.com/chart/top?ref_=nv_wl_img_3"
page = read_html(url)
movie.nodes = html_nodes(page,'.titleColumn a')
# Check one node
#xmlTreeParse(movie.nodes[[1]])
movie.link = sapply(html_attrs(movie.nodes),`[[`,'href')
movie.link = paste0("http://www.imdb.com", movie.link)
movie.cast = sapply(html_attrs(movie.nodes),`[[`,'title')
movie.name = html_text(movie.nodes)
year = gsub(")","",gsub("\\(","",html_text(html_nodes(page,'.secondaryInfo'))))
top250 = data.frame(movie.name, year, movie.cast, movie.link)
head(top250)
## movie.name year
## 1 The Shawshank Redemption 1994
## 2 The Godfather 1972
## 3 The Godfather: Part II 1974
## 4 The Dark Knight 2008
## 5 12 Angry Men 1957
## 6 Schindler's List 1993
## movie.cast
## 1 Frank Darabont (dir.), Tim Robbins, Morgan Freeman
## 2 Francis Ford Coppola (dir.), Marlon Brando, Al Pacino
## 3 Francis Ford Coppola (dir.), Al Pacino, Robert De Niro
## 4 Christopher Nolan (dir.), Christian Bale, Heath Ledger
## 5 Sidney Lumet (dir.), Henry Fonda, Lee J. Cobb
## 6 Steven Spielberg (dir.), Liam Neeson, Ralph Fiennes
## movie.link
## 1 http://www.imdb.com/title/tt0111161/?pf_rd_m=A2FGELUUNOQJNL&pf_rd_p=2398042102&pf_rd_r=0R7MD7TVA6C9WRZHYWCM&pf_rd_s=center-1&pf_rd_t=15506&pf_rd_i=top&ref_=chttp_tt_1
## 2 http://www.imdb.com/title/tt0068646/?pf_rd_m=A2FGELUUNOQJNL&pf_rd_p=2398042102&pf_rd_r=0R7MD7TVA6C9WRZHYWCM&pf_rd_s=center-1&pf_rd_t=15506&pf_rd_i=top&ref_=chttp_tt_2
## 3 http://www.imdb.com/title/tt0071562/?pf_rd_m=A2FGELUUNOQJNL&pf_rd_p=2398042102&pf_rd_r=0R7MD7TVA6C9WRZHYWCM&pf_rd_s=center-1&pf_rd_t=15506&pf_rd_i=top&ref_=chttp_tt_3
## 4 http://www.imdb.com/title/tt0468569/?pf_rd_m=A2FGELUUNOQJNL&pf_rd_p=2398042102&pf_rd_r=0R7MD7TVA6C9WRZHYWCM&pf_rd_s=center-1&pf_rd_t=15506&pf_rd_i=top&ref_=chttp_tt_4
## 5 http://www.imdb.com/title/tt0050083/?pf_rd_m=A2FGELUUNOQJNL&pf_rd_p=2398042102&pf_rd_r=0R7MD7TVA6C9WRZHYWCM&pf_rd_s=center-1&pf_rd_t=15506&pf_rd_i=top&ref_=chttp_tt_5
## 6 http://www.imdb.com/title/tt0108052/?pf_rd_m=A2FGELUUNOQJNL&pf_rd_p=2398042102&pf_rd_r=0R7MD7TVA6C9WRZHYWCM&pf_rd_s=center-1&pf_rd_t=15506&pf_rd_i=top&ref_=chttp_tt_6
mvurl <- data.frame(top250$movie.link)
movie.director<-list()
movie.tagline<-list()
movie.star<-list()
movie.genre<-list()
movie.storyline<-list()
movie.budget<-list()
movie.gross<-list()
x<-nrow(mvurl)
i=1
readpage<-function(u)
{
tryCatch(read_html(u), error = function(e) {
Sys.sleep(5)
readpage(url)
}
)
}
while(i<=x)
{
url <- gsub("250 Levels: http://www.imdb.com/title/tt0012349/?pf_rd_m=A2FGELUUNOQJNL&pf_rd_p=2398042102&pf_rd_r=0AC17525S9QK2DB09MS7&pf_rd_s=center-1&pf_rd_t=15506&pf_rd_i=top&ref_=chttp_tt_95 ...","",mvurl[i,1])
page = readpage(url)
movie.director[[i]] <- html_text(html_nodes(page,'.summary_text+ .credit_summary_item .itemprop'))
movie.tagline[[i]] <- html_text(html_nodes(page, '#titleStoryLine .txt-block:nth-child(8)'))
movie.star[[i]] <-html_text(html_nodes(page,'.credit_summary_item~ .credit_summary_item+ .credit_summary_item'))
movie.genre[[i]] <- html_text(html_nodes(page,'.see-more.canwrap~ .canwrap a'))
movie.storyline[[i]] <- html_text(html_nodes(page,'#titleStoryLine p'))
### Scrape Box Office budget and gross begin ###
str<-html_text(html_nodes(page,'#titleDetails .txt-block:nth-child(13) , #titleDetails')) #read html text based on selector mentioned
str<-gsub("\\s+"," ", str) #remove extra spaces
rxb<-regexpr("Budget:( +)([A-Z$? ]+)([0-9,]+)",str)
bob<-gsub("Budget: ","",substr(str,rxb,rxb+attr(rxb,"match.length")-1))
rxg<-regexpr("Gross:( +)([A-Z$? ]+)([0-9,]+)",str)
bog<-gsub("Gross: ","",substr(str,rxg,rxg+attr(rxg,"match.length")-1))
movie.budget[[i]]<- unique(bob)
movie.gross[[i]]<- unique(bog)
### Scrape Box Office budget and gross end ###
paste0 (i)
i<- i+1
}
top250$movie.director<-vapply(movie.director, paste, collapse = ", ", character(1L))
top250$movie.tagline<-vapply(movie.tagline, paste, collapse = ", ", character(1L))
top250$movie.star<-vapply(movie.star, paste, collapse = ", ", character(1L))
top250$movie.genre<-vapply(movie.genre, paste, collapse = ", ", character(1L))
top250$movie.storyline<-vapply(movie.storyline, paste, collapse = ", ", character(1L))
top250$movie.budget<-vapply(movie.budget, paste, collapse = ", ", character(1L))
top250$movie.gross<-vapply(movie.gross, paste, collapse = ", ", character(1L))
#movie.tagline character(0) needs replacements
y<-unique(top250$movie.tagline)
top250$movie.tagline<-ifelse(top250$movie.tagline=="","NA",top250$movie.tagline)
#movie.genre character(0) needs replacements
y<-unique(top250$movie.genre)
top250$movie.genre<-ifelse(top250$movie.genre=="","NA",top250$movie.genre)
#movie.budget character(0) needs replacements
y<-unique(top250$movie.budget)
top250$movie.budget<-ifelse(top250$movie.budget=="","NA",top250$movie.budget)
#movie.gross character(0) needs replacements
y<-unique(top250$movie.gross)
top250$movie.gross<-ifelse(top250$movie.gross=="","NA",top250$movie.gross)
#movie.tagline text cleaning
top250$movie.tagline<-gsub("Taglines: |See more (.*)+","",gsub("\\s+"," ",top250$movie.tagline))
#movie.star text cleaning
top250$movie.star<-gsub("Stars: |(\\| See full cast & crew)+ (.*)+","",gsub("\\s+"," ",top250$movie.star))
#movie.storyline text cleaning
top250$movie.storyline<-gsub("\\s+"," ",top250$movie.storyline)
#movie.genre text cleaning
top250$movie.genre<-gsub("\\s+","",top250$movie.genre)
imdbtop250<-data.frame(top250$movie.name, top250$year, top250$movie.director, top250$movie.star, top250$movie.genre, top250$movie.tagline, top250$movie.storyline, top250$movie.budget, top250$movie.gross, top250$movie.link)
colnames(imdbtop250)<-c("MovieName","ReleaseYear","Movie.Director","Movie.Star","Movie.Genre","Movie.Tagline","Movie.Storyline","Movie.Budget","Movie.Gross","Movie.Link")
head(imdbtop250)
## MovieName ReleaseYear Movie.Director
## 1 The Shawshank Redemption 1994 Frank Darabont
## 2 The Godfather 1972 Francis Ford Coppola
## 3 The Godfather: Part II 1974 Francis Ford Coppola
## 4 The Dark Knight 2008 Christopher Nolan
## 5 12 Angry Men 1957 Sidney Lumet
## 6 Schindler's List 1993 Steven Spielberg
## Movie.Star
## 1 Tim Robbins, Morgan Freeman, Bob Gunton
## 2 Marlon Brando, Al Pacino, James Caan
## 3 Al Pacino, Robert De Niro, Robert Duvall
## 4 Christian Bale, Heath Ledger, Aaron Eckhart
## 5 Henry Fonda, Lee J. Cobb, Martin Balsam
## 6 Liam Neeson, Ralph Fiennes, Ben Kingsley
## Movie.Genre
## 1 Crime,Drama
## 2 Crime,Drama
## 3 Crime,Drama
## 4 Action,Crime,Drama,Thriller
## 5 Crime,Drama
## 6 Biography,Drama,History
## Movie.Tagline
## 1 Fear can hold you prisoner. Hope can set you free.
## 2 The Godfather is now a movie.
## 3 NA
## 4 Welcome to a world without rules.
## 5 ...it explodes like twelve sticks of dynamite!
## 6 Whoever saves one life, saves the world entire.
## Movie.Storyline
## 1 Chronicles the experiences of a formerly successful banker as a prisoner in the gloomy jailhouse of Shawshank after being found guilty of a crime he did not commit. The film portrays the man's unique way of dealing with his new, torturous life; along the way he befriends a number of fellow prisoners, most notably a wise long-term inmate named Red. Written by J-S-Golden
## 2 When the aging head of a famous crime family decides to transfer his position to one of his subalterns, a series of unfortunate events start happening to the family, and a war begins between all the well-known families leading to insolence, deportation, murder and revenge, and ends with the favorable successor being finally chosen. Written by J. S. Golden
## 3 The continuing saga of the Corleone crime family tells the story of a young Vito Corleone growing up in Sicily and in 1910s New York; and follows Michael Corleone in the 1950s as he attempts to expand the family business into Las Vegas, Hollywood and Cuba. Written by Keith Loh <loh@sfu.ca>
## 4 Set within a year after the events of Batman Begins, Batman, Lieutenant James Gordon, and new district attorney Harvey Dent successfully begin to round up the criminals that plague Gotham City until a mysterious and sadistic criminal mastermind known only as the Joker appears in Gotham, creating a new wave of chaos. Batman's struggle against the Joker becomes deeply personal, forcing him to "confront everything he believes" and improve his technology to stop him. A love triangle develops between Bruce Wayne, Dent and Rachel Dawes. Written by Leon Lombardi
## 5 The defense and the prosecution have rested and the jury is filing into the jury room to decide if a young man is guilty or innocent of murdering his father. What begins as an open-and-shut case of murder soon becomes a detective story that presents a succession of clues creating doubt, and a mini-drama of each of the jurors' prejudices and preconceptions about the trial, the accused, and each other. Based on the play, all of the action takes place on the stage of the jury room. Written by pjk <PETESID@VNET.IBM.COM>
## 6 Oskar Schindler is a vainglorious and greedy German businessman who becomes an unlikely humanitarian amid the barbaric German Nazi reign when he feels compelled to turn his factory into a refuge for Jews. Based on the true story of Oskar Schindler who managed to save about 1100 Jews from being gassed at the Auschwitz concentration camp, it is a testament to the good in all of us. Written by Harald Mayr <marvin@bike.augusta.de>
## Movie.Budget Movie.Gross
## 1 $25,000,000 $28,341,469
## 2 $6,000,000, $134,821,952
## 3 $13,000,000 $57,300,000
## 4 $185,000,000, $533,316,061
## 5 $350,000 NA
## 6 $22,000,000, $96,067,179
## Movie.Link
## 1 http://www.imdb.com/title/tt0111161/?pf_rd_m=A2FGELUUNOQJNL&pf_rd_p=2398042102&pf_rd_r=0R7MD7TVA6C9WRZHYWCM&pf_rd_s=center-1&pf_rd_t=15506&pf_rd_i=top&ref_=chttp_tt_1
## 2 http://www.imdb.com/title/tt0068646/?pf_rd_m=A2FGELUUNOQJNL&pf_rd_p=2398042102&pf_rd_r=0R7MD7TVA6C9WRZHYWCM&pf_rd_s=center-1&pf_rd_t=15506&pf_rd_i=top&ref_=chttp_tt_2
## 3 http://www.imdb.com/title/tt0071562/?pf_rd_m=A2FGELUUNOQJNL&pf_rd_p=2398042102&pf_rd_r=0R7MD7TVA6C9WRZHYWCM&pf_rd_s=center-1&pf_rd_t=15506&pf_rd_i=top&ref_=chttp_tt_3
## 4 http://www.imdb.com/title/tt0468569/?pf_rd_m=A2FGELUUNOQJNL&pf_rd_p=2398042102&pf_rd_r=0R7MD7TVA6C9WRZHYWCM&pf_rd_s=center-1&pf_rd_t=15506&pf_rd_i=top&ref_=chttp_tt_4
## 5 http://www.imdb.com/title/tt0050083/?pf_rd_m=A2FGELUUNOQJNL&pf_rd_p=2398042102&pf_rd_r=0R7MD7TVA6C9WRZHYWCM&pf_rd_s=center-1&pf_rd_t=15506&pf_rd_i=top&ref_=chttp_tt_5
## 6 http://www.imdb.com/title/tt0108052/?pf_rd_m=A2FGELUUNOQJNL&pf_rd_p=2398042102&pf_rd_r=0R7MD7TVA6C9WRZHYWCM&pf_rd_s=center-1&pf_rd_t=15506&pf_rd_i=top&ref_=chttp_tt_6
movies_1996_1998<-top250[year >= 1996 & year <= 1998,]
write.csv(movies_1996_1998,'Movies_1996_1998.csv', row.names = F)
movies_1996_1998
## movie.name year
## 26 La vita è bella 1997
## 29 Saving Private Ryan 1998
## 31 American History X 1998
## 65 Mononoke-hime 1997
## 97 L.A. Confidential 1997
## 107 Good Will Hunting 1997
## 118 Bacheha-Ye aseman 1997
## 136 Lock, Stock and Two Smoking Barrels 1998
## 151 The Big Lebowski 1998
## 155 Trainspotting 1996
## 157 Fargo 1996
## 206 The Truman Show 1998
## movie.cast
## 26 Roberto Benigni (dir.), Roberto Benigni, Nicoletta Braschi
## 29 Steven Spielberg (dir.), Tom Hanks, Matt Damon
## 31 Tony Kaye (dir.), Edward Norton, Edward Furlong
## 65 Hayao Miyazaki (dir.), Yôji Matsuda, Yuriko Ishida
## 97 Curtis Hanson (dir.), Kevin Spacey, Russell Crowe
## 107 Gus Van Sant (dir.), Robin Williams, Matt Damon
## 118 Majid Majidi (dir.), Mohammad Amir Naji, Amir Farrokh Hashemian
## 136 Guy Ritchie (dir.), Jason Flemyng, Dexter Fletcher
## 151 Joel Coen (dir.), Jeff Bridges, John Goodman
## 155 Danny Boyle (dir.), Ewan McGregor, Ewen Bremner
## 157 Joel Coen (dir.), William H. Macy, Frances McDormand
## 206 Peter Weir (dir.), Jim Carrey, Ed Harris
## movie.link
## 26 http://www.imdb.com/title/tt0118799/?pf_rd_m=A2FGELUUNOQJNL&pf_rd_p=2398042102&pf_rd_r=0R7MD7TVA6C9WRZHYWCM&pf_rd_s=center-1&pf_rd_t=15506&pf_rd_i=top&ref_=chttp_tt_26
## 29 http://www.imdb.com/title/tt0120815/?pf_rd_m=A2FGELUUNOQJNL&pf_rd_p=2398042102&pf_rd_r=0R7MD7TVA6C9WRZHYWCM&pf_rd_s=center-1&pf_rd_t=15506&pf_rd_i=top&ref_=chttp_tt_29
## 31 http://www.imdb.com/title/tt0120586/?pf_rd_m=A2FGELUUNOQJNL&pf_rd_p=2398042102&pf_rd_r=0R7MD7TVA6C9WRZHYWCM&pf_rd_s=center-1&pf_rd_t=15506&pf_rd_i=top&ref_=chttp_tt_31
## 65 http://www.imdb.com/title/tt0119698/?pf_rd_m=A2FGELUUNOQJNL&pf_rd_p=2398042102&pf_rd_r=0R7MD7TVA6C9WRZHYWCM&pf_rd_s=center-1&pf_rd_t=15506&pf_rd_i=top&ref_=chttp_tt_65
## 97 http://www.imdb.com/title/tt0119488/?pf_rd_m=A2FGELUUNOQJNL&pf_rd_p=2398042102&pf_rd_r=0R7MD7TVA6C9WRZHYWCM&pf_rd_s=center-1&pf_rd_t=15506&pf_rd_i=top&ref_=chttp_tt_97
## 107 http://www.imdb.com/title/tt0119217/?pf_rd_m=A2FGELUUNOQJNL&pf_rd_p=2398042102&pf_rd_r=0R7MD7TVA6C9WRZHYWCM&pf_rd_s=center-1&pf_rd_t=15506&pf_rd_i=top&ref_=chttp_tt_107
## 118 http://www.imdb.com/title/tt0118849/?pf_rd_m=A2FGELUUNOQJNL&pf_rd_p=2398042102&pf_rd_r=0R7MD7TVA6C9WRZHYWCM&pf_rd_s=center-1&pf_rd_t=15506&pf_rd_i=top&ref_=chttp_tt_118
## 136 http://www.imdb.com/title/tt0120735/?pf_rd_m=A2FGELUUNOQJNL&pf_rd_p=2398042102&pf_rd_r=0R7MD7TVA6C9WRZHYWCM&pf_rd_s=center-1&pf_rd_t=15506&pf_rd_i=top&ref_=chttp_tt_136
## 151 http://www.imdb.com/title/tt0118715/?pf_rd_m=A2FGELUUNOQJNL&pf_rd_p=2398042102&pf_rd_r=0R7MD7TVA6C9WRZHYWCM&pf_rd_s=center-1&pf_rd_t=15506&pf_rd_i=top&ref_=chttp_tt_151
## 155 http://www.imdb.com/title/tt0117951/?pf_rd_m=A2FGELUUNOQJNL&pf_rd_p=2398042102&pf_rd_r=0R7MD7TVA6C9WRZHYWCM&pf_rd_s=center-1&pf_rd_t=15506&pf_rd_i=top&ref_=chttp_tt_155
## 157 http://www.imdb.com/title/tt0116282/?pf_rd_m=A2FGELUUNOQJNL&pf_rd_p=2398042102&pf_rd_r=0R7MD7TVA6C9WRZHYWCM&pf_rd_s=center-1&pf_rd_t=15506&pf_rd_i=top&ref_=chttp_tt_157
## 206 http://www.imdb.com/title/tt0120382/?pf_rd_m=A2FGELUUNOQJNL&pf_rd_p=2398042102&pf_rd_r=0R7MD7TVA6C9WRZHYWCM&pf_rd_s=center-1&pf_rd_t=15506&pf_rd_i=top&ref_=chttp_tt_206
## movie.director
## 26 Roberto Benigni
## 29 Steven Spielberg
## 31 Tony Kaye
## 65 Hayao Miyazaki
## 97 Curtis Hanson
## 107 Gus Van Sant
## 118 Majid Majidi
## 136 Guy Ritchie
## 151 Joel Coen, Ethan Coen
## 155 Danny Boyle
## 157 Joel Coen, Ethan Coen
## 206 Peter Weir
## movie.tagline
## 26 An unforgettable fable that proves love, family and imagination conquer all.
## 29 In the Last Great Invasion of the Last Great War, The Greatest Danger for Eight Men was Saving... One.
## 31 Violence as a way of life.
## 65 The Fate Of The World Rests On The Courage Of One Warrior.
## 97 It's a crime saga that will shock you. It's a mystery that will keep you guessing. It's a thriller that will keep you riveted.
## 107 Some people can never believe in themselves, until someone believes in them.
## 118 A Little Secret...Their Biggest Adventure!
## 136 A Disgrace to Criminals Everywhere.
## 151 (Israel, translated from Hebrew): Lebowski: Not a man, a way of life
## 155 Choose life. Choose a job. Choose a starter home. Choose dental insurance, leisure wear and matching luggage. Choose your future. But why would anyone want to do a thing like that?
## 157 An ordinary place, an extraordinary thriller
## 206 The Story Of A Lifetime
## movie.star
## 26 Roberto Benigni, Nicoletta Braschi, Giorgio Cantarini
## 29 Tom Hanks, Matt Damon, Tom Sizemore
## 31 Edward Norton, Edward Furlong, Beverly D'Angelo
## 65 Yôji Matsuda, Yuriko Ishida, Yûko Tanaka
## 97 Kevin Spacey, Russell Crowe, Guy Pearce
## 107 Robin Williams, Matt Damon, Ben Affleck
## 118 Mohammad Amir Naji, Amir Farrokh Hashemian, Bahare Seddiqi
## 136 Jason Flemyng, Dexter Fletcher, Nick Moran
## 151 Jeff Bridges, John Goodman, Julianne Moore
## 155 Ewan McGregor, Ewen Bremner, Jonny Lee Miller
## 157 William H. Macy, Frances McDormand, Steve Buscemi
## 206 Jim Carrey, Ed Harris, Laura Linney
## movie.genre
## 26 Comedy,Drama,War
## 29 Action,Drama,War
## 31 Crime,Drama
## 65 Animation,Adventure,Fantasy
## 97 Crime,Drama,Mystery,Thriller
## 107 Drama
## 118 Drama,Family
## 136 Comedy,Crime
## 151 Comedy,Crime
## 155 Drama
## 157 Crime,Drama,Thriller
## 206 Comedy,Drama,Sci-Fi
## movie.storyline
## 26 In 1930s Italy, a carefree Jewish book keeper named Guido starts a fairy tale life by courting and marrying a lovely woman from a nearby city. Guido and his wife have a son and live happily together until the occupation of Italy by German forces. In an attempt to hold his family together and help his son survive the horrors of a Jewish Concentration Camp, Guido imagines that the Holocaust is a game and that the grand prize for winning is a tank. Written by Anthony Hughes <husnock31@hotmail.com>
## 29 Opening with the Allied invasion of Normandy on 6 June 1944, members of the 2nd Ranger Battalion under Cpt. Miller fight ashore to secure a beachhead. Amidst the fighting, two brothers are killed in action. Earlier in New Guinea, a third brother is KIA. Their mother, Mrs. Ryan, is to receive all three of the grave telegrams on the same day. The United States Army Chief of Staff, George C. Marshall, is given an opportunity to alleviate some of her grief when he learns of a fourth brother, Private James Ryan, and decides to send out 8 men (Cpt. Miller and select members from 2nd Rangers) to find him and bring him back home to his mother... Written by J.Zelman
## 31 Derek Vineyard is paroled after serving 3 years in prison for killing two thugs who tried to break into/steal his truck. Through his brother, Danny Vineyard's narration, we learn that before going to prison, Derek was a skinhead and the leader of a violent white supremacist gang that committed acts of racial crime throughout L.A. and his actions greatly influenced Danny. Reformed and fresh out of prison, Derek severs contact with the gang and becomes determined to keep Danny from going down the same violent path as he did. Written by Nitesh D.(nmxpa7@msn.com)
## 65 While protecting his village from rampaging boar-god/demon, a confident young warrior, Ashitaka, is stricken by a deadly curse. To save his life, he must journey to the forests of the west. Once there, he's embroiled in a fierce campaign that humans were waging on the forest. The ambitious Lady Eboshi and her loyal clan use their guns against the gods of the forest and a brave young woman, Princess Mononoke, who was raised by a wolf-god. Ashitaka sees the good in both sides and tries to stem the flood of blood. This is met be animosity by both sides as they each see him as supporting the enemy. Written by Christopher Taguchi
## 97 1950's Los Angeles is the seedy backdrop for this intricate noir-ish tale of police corruption and Hollywood sleaze. Three very different cops are all after the truth, each in their own style: Ed Exley, the golden boy of the police force, willing to do almost anything to get ahead, except sell out; Bud White, ready to break the rules to seek justice, but barely able to keep his raging violence under control; and Jack Vincennes, always looking for celebrity and a quick buck until his conscience drives him to join Exley and White down the one-way path to find the truth behind the dark world of L.A. crime. Written by Greg Bole <bole@life.bio.sunysb.edu>
## 107 A touching tale of a wayward young man who struggles to find his identity, living in a world where he can solve any problem, except the one brewing deep within himself, until one day he meets his soul mate who opens his mind and his heart. Written by Dima & Danielle
## 118 Zahra's shoes are gone; her older brother Ali lost them. They are poor, there are no shoes for Zahra until they come up with an idea: they will share one pair of shoes, Ali's. School awaits. Will the plan succeed? Written by Eileen Berdon <eberdon@aol.com>
## 136 Four Jack-the-lads find themselves heavily - seriously heavily - in debt to an East End hard man and his enforcers after a crooked card game. Overhearing their neighbours in the next flat plotting to hold up a group of out-of-their-depth drug growers, our heros decide to stitch up the robbers in turn. In a way the confusion really starts when a pair of antique double-barrelled shotguns go missing in a completely different scam. Written by Anonymous
## 151 When "The Dude" Lebowski is mistaken for a millionaire Lebowski, two thugs urinate on his rug to coerce him into paying a debt he knows nothing about. While attempting to gain recompense for the ruined rug from his wealthy counterpart, he accepts a one-time job with high pay-off. He enlists the help of his bowling buddy, Walter, a gun-toting Jewish-convert with anger issues. Deception leads to more trouble, and it soon seems that everyone from porn empire tycoons to nihilists want something from The Dude. Written by J. Lake
## 155 A wild, freeform, Rabelaisian trip through the darkest recesses of Edinburgh low-life, focusing on Mark Renton and his attempt to give up his heroin habit, and how the latter affects his relationship with family and friends: Sean Connery wannabe Sick Boy, dimbulb Spud, psycho Begbie, 14-year-old girlfriend Diane, and clean-cut athlete Tommy, who's never touched drugs but can't help being curious about them... Written by Michael Brooke <michael@everyman.demon.co.uk>
## 157 Jerry works in his father-in-law's car dealership and has gotten himself in financial problems. He tries various schemes to come up with money needed for a reason that is never really explained. It has to be assumed that his huge embezzlement of money from the dealership is about to be discovered by father-in-law. When all else falls through, plans he set in motion earlier for two men to kidnap his wife for ransom to be paid by her wealthy father (who doesn't seem to have the time of day for son-in-law). From the moment of the kidnapping, things go wrong and what was supposed to be a non-violent affair turns bloody with more blood added by the minute. Jerry is upset at the bloodshed, which turns loose a pregnant sheriff from Brainerd, MN who is tenacious in attempting to solve the three murders in her jurisdiction. Written by Anonymous
## 206 In this movie, Truman is a man whose life is a fake one... The place he lives is in fact a big studio with hidden cameras everywhere, and all his friends and people around him, are actors who play their roles in the most popular TV-series in the world: The Truman Show. Truman thinks that he is an ordinary man with an ordinary life and has no idea about how he is exploited. Until one day... he finds out everything. Will he react? Written by Chris Makrozahopoulos <makzax@hotmail.com>
## movie.budget movie.gross
## 26 $20,000,000, $57,598,247
## 29 $70,000,000, $216,119,491
## 31 $7,500,000, $6,712,241
## 65 JPY 2,400,000,000 $2,298,191
## 97 $35,000,000, $64,604,977
## 107 $10,000,000, $138,339,411
## 118 $180,000, $925,402
## 136 NA $3,650,677
## 151 $15,000,000 $17,439,163
## 155 $3,500,000, $16,501,785
## 157 $7,000,000, $25,882,374
## 206 $60,000,000, $125,603,360
write.csv(imdbtop250,'IMDB_Top_250.csv', row.names = F)
head(imdbtop250)
## MovieName ReleaseYear Movie.Director
## 1 The Shawshank Redemption 1994 Frank Darabont
## 2 The Godfather 1972 Francis Ford Coppola
## 3 The Godfather: Part II 1974 Francis Ford Coppola
## 4 The Dark Knight 2008 Christopher Nolan
## 5 12 Angry Men 1957 Sidney Lumet
## 6 Schindler's List 1993 Steven Spielberg
## Movie.Star
## 1 Tim Robbins, Morgan Freeman, Bob Gunton
## 2 Marlon Brando, Al Pacino, James Caan
## 3 Al Pacino, Robert De Niro, Robert Duvall
## 4 Christian Bale, Heath Ledger, Aaron Eckhart
## 5 Henry Fonda, Lee J. Cobb, Martin Balsam
## 6 Liam Neeson, Ralph Fiennes, Ben Kingsley
## Movie.Genre
## 1 Crime,Drama
## 2 Crime,Drama
## 3 Crime,Drama
## 4 Action,Crime,Drama,Thriller
## 5 Crime,Drama
## 6 Biography,Drama,History
## Movie.Tagline
## 1 Fear can hold you prisoner. Hope can set you free.
## 2 The Godfather is now a movie.
## 3 NA
## 4 Welcome to a world without rules.
## 5 ...it explodes like twelve sticks of dynamite!
## 6 Whoever saves one life, saves the world entire.
## Movie.Storyline
## 1 Chronicles the experiences of a formerly successful banker as a prisoner in the gloomy jailhouse of Shawshank after being found guilty of a crime he did not commit. The film portrays the man's unique way of dealing with his new, torturous life; along the way he befriends a number of fellow prisoners, most notably a wise long-term inmate named Red. Written by J-S-Golden
## 2 When the aging head of a famous crime family decides to transfer his position to one of his subalterns, a series of unfortunate events start happening to the family, and a war begins between all the well-known families leading to insolence, deportation, murder and revenge, and ends with the favorable successor being finally chosen. Written by J. S. Golden
## 3 The continuing saga of the Corleone crime family tells the story of a young Vito Corleone growing up in Sicily and in 1910s New York; and follows Michael Corleone in the 1950s as he attempts to expand the family business into Las Vegas, Hollywood and Cuba. Written by Keith Loh <loh@sfu.ca>
## 4 Set within a year after the events of Batman Begins, Batman, Lieutenant James Gordon, and new district attorney Harvey Dent successfully begin to round up the criminals that plague Gotham City until a mysterious and sadistic criminal mastermind known only as the Joker appears in Gotham, creating a new wave of chaos. Batman's struggle against the Joker becomes deeply personal, forcing him to "confront everything he believes" and improve his technology to stop him. A love triangle develops between Bruce Wayne, Dent and Rachel Dawes. Written by Leon Lombardi
## 5 The defense and the prosecution have rested and the jury is filing into the jury room to decide if a young man is guilty or innocent of murdering his father. What begins as an open-and-shut case of murder soon becomes a detective story that presents a succession of clues creating doubt, and a mini-drama of each of the jurors' prejudices and preconceptions about the trial, the accused, and each other. Based on the play, all of the action takes place on the stage of the jury room. Written by pjk <PETESID@VNET.IBM.COM>
## 6 Oskar Schindler is a vainglorious and greedy German businessman who becomes an unlikely humanitarian amid the barbaric German Nazi reign when he feels compelled to turn his factory into a refuge for Jews. Based on the true story of Oskar Schindler who managed to save about 1100 Jews from being gassed at the Auschwitz concentration camp, it is a testament to the good in all of us. Written by Harald Mayr <marvin@bike.augusta.de>
## Movie.Budget Movie.Gross
## 1 $25,000,000 $28,341,469
## 2 $6,000,000, $134,821,952
## 3 $13,000,000 $57,300,000
## 4 $185,000,000, $533,316,061
## 5 $350,000 NA
## 6 $22,000,000, $96,067,179
## Movie.Link
## 1 http://www.imdb.com/title/tt0111161/?pf_rd_m=A2FGELUUNOQJNL&pf_rd_p=2398042102&pf_rd_r=0R7MD7TVA6C9WRZHYWCM&pf_rd_s=center-1&pf_rd_t=15506&pf_rd_i=top&ref_=chttp_tt_1
## 2 http://www.imdb.com/title/tt0068646/?pf_rd_m=A2FGELUUNOQJNL&pf_rd_p=2398042102&pf_rd_r=0R7MD7TVA6C9WRZHYWCM&pf_rd_s=center-1&pf_rd_t=15506&pf_rd_i=top&ref_=chttp_tt_2
## 3 http://www.imdb.com/title/tt0071562/?pf_rd_m=A2FGELUUNOQJNL&pf_rd_p=2398042102&pf_rd_r=0R7MD7TVA6C9WRZHYWCM&pf_rd_s=center-1&pf_rd_t=15506&pf_rd_i=top&ref_=chttp_tt_3
## 4 http://www.imdb.com/title/tt0468569/?pf_rd_m=A2FGELUUNOQJNL&pf_rd_p=2398042102&pf_rd_r=0R7MD7TVA6C9WRZHYWCM&pf_rd_s=center-1&pf_rd_t=15506&pf_rd_i=top&ref_=chttp_tt_4
## 5 http://www.imdb.com/title/tt0050083/?pf_rd_m=A2FGELUUNOQJNL&pf_rd_p=2398042102&pf_rd_r=0R7MD7TVA6C9WRZHYWCM&pf_rd_s=center-1&pf_rd_t=15506&pf_rd_i=top&ref_=chttp_tt_5
## 6 http://www.imdb.com/title/tt0108052/?pf_rd_m=A2FGELUUNOQJNL&pf_rd_p=2398042102&pf_rd_r=0R7MD7TVA6C9WRZHYWCM&pf_rd_s=center-1&pf_rd_t=15506&pf_rd_i=top&ref_=chttp_tt_6
genre<-data.frame(Movie=character(),Crime=integer(),Drama=integer(),Action=integer(),Thriller=integer(),Biography=integer(),History=integer(),Adventure=integer(),Fantasy=integer(),Western=integer(),SciFi=integer(),Comedy=integer(),Mystery=integer(),Family=integer(),War=integer(),Animation=integer(),Romance=integer(),Horror=integer(),Music=integer(),Musical=integer(),FilmNoir=integer(),Sport=integer(),stringsAsFactors=FALSE)
for(r in 1:250){
genre[r,1]<-movie.name[r]
len<-length(gsub("\\s","",unlist(movie.genre[r])))
while(len>=1)
{
c<-grep(gsub("\\s|-","",unlist(movie.genre[r]))[len],colnames(genre))
genre[r,c]<-1
len<-len-1
}
}
write.csv(genre,'genre.csv', row.names = F)
head(genre)
## Movie Crime Drama Action Thriller Biography History
## 1 The Shawshank Redemption 1 1 NA NA NA NA
## 2 The Godfather 1 1 NA NA NA NA
## 3 The Godfather: Part II 1 1 NA NA NA NA
## 4 The Dark Knight 1 1 1 1 NA NA
## 5 12 Angry Men 1 1 NA NA NA NA
## 6 Schindler's List NA 1 NA NA 1 1
## Adventure Fantasy Western SciFi Comedy Mystery Family War Animation
## 1 NA NA NA NA NA NA NA NA NA
## 2 NA NA NA NA NA NA NA NA NA
## 3 NA NA NA NA NA NA NA NA NA
## 4 NA NA NA NA NA NA NA NA NA
## 5 NA NA NA NA NA NA NA NA NA
## 6 NA NA NA NA NA NA NA NA NA
## Romance Horror Music Musical FilmNoir Sport
## 1 NA NA NA NA NA NA
## 2 NA NA NA NA NA NA
## 3 NA NA NA NA NA NA
## 4 NA NA NA NA NA NA
## 5 NA NA NA NA NA NA
## 6 NA NA NA NA NA NA
colSums(genre[,2:22],na.rm=TRUE)
## Crime Drama Action Thriller Biography History Adventure
## 56 176 36 63 26 17 62
## Fantasy Western SciFi Comedy Mystery Family War
## 33 10 31 42 36 24 30
## Animation Romance Horror Music Musical FilmNoir Sport
## 20 24 5 2 7 7 7
Drama, Thriller, Adventure, Crime are the top most genres on the IMDB Top 250 list. Music and Horror are the least most genres on the IMDB Top 250 list.