Extracting Keywords for Movies in IMDB

library(textreadr)
thisfile <- "https://www.imdb.com/title/tt0112462/keywords?ref_=tt_ql_stry_4"
thisfile_Data <- read_html(thisfile, skip = 0, remove.empty = TRUE)

Extract Keyword List

Start_point <- grep("plot keywords",thisfile_Data)+1
End_point <- grep("See also",thisfile_Data)-1

thisfile_Data <- thisfile_Data[Start_point:End_point]

Remove some phrases

thisfile_Data <- setdiff(thisfile_Data,c("Yes","No","Is this relevant?","Relevant?"))
thisfile_Data <- thisfile_Data[ -(grep("found this relevant",thisfile_Data))]

List of Keywords

thisfile_Data
##   [1] "batman character"                        
##   [2] "bruce wayne character"                   
##   [3] "alfred pennyworth character"             
##   [4] "harvey dent character"                   
##   [5] "james gordon character"                  
##   [6] "dick grayson character"                  
##   [7] "two face character"                      
##   [8] "robin character"                         
##   [9] "riddler character"                       
##  [10] "batcave"                                 
##  [11] "wayne manor"                             
##  [12] "villain team up"                         
##  [13] "gotham city"                             
##  [14] "bat signal"                              
##  [15] "batboat"                                 
##  [16] "superhero action"                        
##  [17] "love"                                    
##  [18] "psychologist"                            
##  [19] "circus"                                  
##  [20] "partner"                                 
##  [21] "millionaire"                             
##  [22] "disfigurement"                           
##  [23] "batwing"                                 
##  [24] "rock music"                              
##  [25] "outlaw"                                  
##  [26] "gang leader"                             
##  [27] "trap"                                    
##  [28] "sympathy"                                
##  [29] "multiple cameos"                         
##  [30] "balladeer"                               
##  [31] "singer offscreen"                        
##  [32] "date"                                    
##  [33] "gang that lives together"                
##  [34] "outlaw gang"                             
##  [35] "boyfriend girlfriend relationship"       
##  [36] "batmobile"                               
##  [37] "female boxer"                            
##  [38] "1990s"                                   
##  [39] "superhero"                               
##  [40] "bat"                                     
##  [41] "halloween"                               
##  [42] "trapeze"                                 
##  [43] "returning character with different actor"
##  [44] "asylum"                                  
##  [45] "butler"                                  
##  [46] "based on comic book"                     
##  [47] "secret identity"                         
##  [48] "sequel"                                  
##  [49] "dc comics"                               
##  [50] "gadget car"                              
##  [51] "mind reading"                            
##  [52] "martha wayne character"                  
##  [53] "thomas wayne character"                  
##  [54] "the mayor character"                     
##  [55] "troubled production"                     
##  [56] "time bomb"                               
##  [57] "heroine"                                 
##  [58] "evil man"                                
##  [59] "villain"                                 
##  [60] "bound and gagged"                        
##  [61] "tape gag"                                
##  [62] "secret revealed"                         
##  [63] "tape over mouth"                         
##  [64] "psychotronic film"                       
##  [65] "man wears eyeglasses"                    
##  [66] "man tied up"                             
##  [67] "woman tied up"                           
##  [68] "legion of doom"                          
##  [69] "male protagonist"                        
##  [70] "altered version of warner bros. logo"    
##  [71] "death trap"                              
##  [72] "candy cinema"                            
##  [73] "part of tetralogy"                       
##  [74] "rescue from drowning"                    
##  [75] "gender in title"                         
##  [76] "superhero adventure"                     
##  [77] "timeframe 1990s"                         
##  [78] "animal in title"                         
##  [79] "supervillain origin"                     
##  [80] "billionaire"                             
##  [81] "title directed by male"                  
##  [82] "animal themed superhero"                 
##  [83] "flipping a coin"                         
##  [84] "reference to god"                        
##  [85] "anniversary"                             
##  [86] "pretty woman"                            
##  [87] "multiple personalities"                  
##  [88] "long hair"                               
##  [89] "finishing someone's sentence"            
##  [90] "reference to open sesame incantation"    
##  [91] "reference to a thousand and one nights"  
##  [92] "hearing aid"                             
##  [93] "employer murders employee"               
##  [94] "evil laughter"                           
##  [95] "character says oh my god"                
##  [96] "glasses"                                 
##  [97] "mind manipulation"                       
##  [98] "flaw exploitation"                       
##  [99] "black suit"                              
## [100] "painted nails"                           
## [101] "painted fingernails"                     
## [102] "woman groping a man"                     
## [103] "groping"                                 
## [104] "red nails"                               
## [105] "woman hitting on a man"                  
## [106] "man likes strong women"                  
## [107] "reference to catwoman"                   
## [108] "reference to a whip"                     
## [109] "false alarm"                             
## [110] "human guinea pig"                        
## [111] "human testing"                           
## [112] "reference to sigmund freud"              
## [113] "calling someone baby"                    
## [114] "calling someone babe"                    
## [115] "employee murders boss"                   
## [116] "brain damage"                            
## [117] "forged suicide note"                     
## [118] "suicide note"                            
## [119] "manor"                                   
## [120] "breaking down a door"                    
## [121] "rorschach inkblot"                       
## [122] "hippodrome"                              
## [123] "character says bring it on"              
## [124] "supervillain"                            
## [125] "flying graysons"                         
## [126] "offscreen death"                         
## [127] "camera focus on corpses"                 
## [128] "reference to metropolis"                 
## [129] "orphan boy"                              
## [130] "car collection"                          
## [131] "childhood flashback"                     
## [132] "burning car"                             
## [133] "michelangelo's david"                    
## [134] "duality"                                 
## [135] "carbon based life form"                  
## [136] "jewel heist"                             
## [137] "barefoot"                                
## [138] "barefoot boy"                            
## [139] "camera shot of feet"                     
## [140] "robbing a casino"                        
## [141] "reference to bluebeard"                  
## [142] "joyride"                                 
## [143] "character asks who the hell are you"     
## [144] "calling someone a bastard"               
## [145] "vengeance"                               
## [146] "reference to buckingham palace"          
## [147] "character says screw you"                
## [148] "sleeping woman"                          
## [149] "smile"                                   
## [150] "smiling man"                             
## [151] "hidden smile"                            
## [152] "considering retirement"                  
## [153] "loneliness"                              
## [154] "feeling alone"                           
## [155] "october 31st"                            
## [156] "october"                                 
## [157] "autumn"                                  
## [158] "character says old habits die hard"      
## [159] "camera shot of legs"                     
## [160] "full moon"                               
## [161] "character says see you in hell"          
## [162] "character says a god am i"               
## [163] "arkham asylum"                           
## [164] "black uniform"                           
## [165] "neo noir"                                
## [166] "boy in jeopardy"                         
## [167] "telling someone to shut up"              
## [168] "corpse"                                  
## [169] "dating"                                  
## [170] "urban gothic"                            
## [171] "city"                                    
## [172] "chase"                                   
## [173] "urban setting"                           
## [174] "knocked unconscious"                     
## [175] "woman with long hair"                    
## [176] "man dresses in black"                    
## [177] "abduction"                               
## [178] "alter ego"                               
## [179] "subway"                                  
## [180] "memory"                                  
## [181] "trick or treater"                        
## [182] "gothic"                                  
## [183] "hero sidekick relationship"              
## [184] "hero villain relationship"               
## [185] "heroism"                                 
## [186] "bad guy"                                 
## [187] "evil"                                    
## [188] "two faced"                               
## [189] "repeat sequel"                           
## [190] "villa"                                   
## [191] "airplane"                                
## [192] "manor house"                             
## [193] "stand alone sequel"                      
## [194] "dual identity"                           
## [195] "impulsiveness"                           
## [196] "hero in jeopardy"                        
## [197] "trick or treating"                       
## [198] "armor"                                   
## [199] "body armor"                              
## [200] "domino mask"                             
## [201] "caped superhero"                         
## [202] "recording the villain"                   
## [203] "duo"                                     
## [204] "masked crime fighter"                    
## [205] "crime fighting"                          
## [206] "villainess"                              
## [207] "slimehouse"                              
## [208] "cult film"                               
## [209] "gotham"                                  
## [210] "acrobat"                                 
## [211] "acid"                                    
## [212] "revenge"                                 
## [213] "district attorney"                       
## [214] "sidekick"                                
## [215] "dream"                                   
## [216] "criminal"                                
## [217] "riddle"                                  
## [218] "dark comedy"                             
## [219] "sequel to cult film"                     
## [220] "underwater scene"                        
## [221] "cosmetic mole"                           
## [222] "cut and paste note"                      
## [223] "opera gloves"                            
## [224] "friends who live together"               
## [225] "confidence"                              
## [226] "tank top"                                
## [227] "two word title"                          
## [228] "punching bag"                            
## [229] "cartoon on tv"                           
## [230] "punched in the face"                     
## [231] "money"                                   
## [232] "acid thrown in face"                     
## [233] "glove"                                   
## [234] "burned with acid"                        
## [235] "comic book hero"                         
## [236] "criminal mastermind"                     
## [237] "net"                                     
## [238] "airplane crash"                          
## [239] "ejection seat"                           
## [240] "exploding boat"                          
## [241] "boat"                                    
## [242] "one man army"                            
## [243] "kidnapping"                              
## [244] "falling down stairs"                     
## [245] "hand grenade"                            
## [246] "home invasion"                           
## [247] "fireplace"                               
## [248] "neck breaking"                           
## [249] "grenade launcher"                        
## [250] "subway station"                          
## [251] "jumping from height"                     
## [252] "chandelier"                              
## [253] "gala"                                    
## [254] "party"                                   
## [255] "cane"                                    
## [256] "coffin"                                  
## [257] "rose"                                    
## [258] "church"                                  
## [259] "funeral"                                 
## [260] "dutch angle"                             
## [261] "deception"                               
## [262] "press conference"                        
## [263] "mayor"                                   
## [264] "jewelry store"                           
## [265] "diamond"                                 
## [266] "hidden room"                             
## [267] "held at gunpoint"                        
## [268] "secret hideout"                          
## [269] "car crash"                               
## [270] "rocket launcher"                         
## [271] "dark past"                               
## [272] "dark hero"                               
## [273] "tragic hero"                             
## [274] "gatling gun"                             
## [275] "motorcycle"                              
## [276] "biker"                                   
## [277] "armored car"                             
## [278] "car accident"                            
## [279] "baseball bat"                            
## [280] "underwater explosion"                    
## [281] "bomb"                                    
## [282] "murder of a family"                      
## [283] "death of family"                         
## [284] "security camera"                         
## [285] "surveillance"                            
## [286] "cover up"                                
## [287] "hologram"                                
## [288] "mind control"                            
## [289] "mad scientist"                           
## [290] "kicking in a door"                       
## [291] "doll"                                    
## [292] "news reporter"                           
## [293] "media coverage"                          
## [294] "news report"                             
## [295] "statue"                                  
## [296] "cape"                                    
## [297] "good versus evil"                        
## [298] "warrior"                                 
## [299] "anti hero"                               
## [300] "costumed hero"                           
## [301] "costume"                                 
## [302] "psychopath"                              
## [303] "henchwoman"                              
## [304] "henchman"                                
## [305] "police commissioner"                     
## [306] "helicopter crash"                        
## [307] "escape"                                  
## [308] "tommy gun"                               
## [309] "bank vault"                              
## [310] "duct tape over mouth"                    
## [311] "tied up"                                 
## [312] "robbery"                                 
## [313] "bank"                                    
## [314] "blood"                                   
## [315] "opening action scene"                    
## [316] "blood splatter"                          
## [317] "kiss"                                    
## [318] "party crashing"                          
## [319] "showdown"                                
## [320] "submarine"                               
## [321] "glider"                                  
## [322] "gunshot wound"                           
## [323] "elevator"                                
## [324] "claw"                                    
## [325] "revolver"                                
## [326] "shot in the head"                        
## [327] "semiautomatic pistol"                    
## [328] "pistol"                                  
## [329] "fistfight"                               
## [330] "brawl"                                   
## [331] "rescue"                                  
## [332] "machine gun"                             
## [333] "vigilante"                               
## [334] "masked vigilante"                        
## [335] "masked hero"                             
## [336] "stylized violence"                       
## [337] "martial arts"                            
## [338] "gadget"                                  
## [339] "cleavage"                                
## [340] "seduction"                               
## [341] "attempted seduction"                     
## [342] "blonde"                                  
## [343] "hand to hand combat"                     
## [344] "kung fu"                                 
## [345] "stick fight"                             
## [346] "two man army"                            
## [347] "action hero"                             
## [348] "hero"                                    
## [349] "damsel in distress"                      
## [350] "tough guy"                               
## [351] "face paint"                              
## [352] "violence"                                
## [353] "slow motion scene"                       
## [354] "lifting a male into the air"             
## [355] "subjective camera"                       
## [356] "lifting an adult into the air"           
## [357] "insane asylum"                           
## [358] "casino"                                  
## [359] "car chase"                               
## [360] "explosion"                               
## [361] "orphan"                                  
## [362] "mansion"                                 
## [363] "tnt"                                     
## [364] "obsession"                               
## [365] "murder"                                  
## [366] "experiment"                              
## [367] "statue of liberty new york city"         
## [368] "bank robbery"                            
## [369] "hostage"                                 
## [370] "exploding helicopter"                    
## [371] "exploding car"                           
## [372] "flashback"                               
## [373] "lifting someone into the air"            
## [374] "concept car"                             
## [375] "coin flipping in the air"                
## [376] "abnormal psychology"                     
## [377] "altered version of studio logo"          
## [378] "buddy"                                   
## [379] "shot to death"                           
## [380] "falling to death"                        
## [381] "black comedy"                            
## [382] "split personality"                       
## [383] "mission"                                 
## [384] "insanity"                                
## [385] "haunted by the past"                     
## [386] "double life"                             
## [387] "disfigured face"                         
## [388] "crime fighter"                           
## [389] "third part"                              
## [390] "coin tossing"                            
## [391] "loss of mother"                          
## [392] "loss of father"                          
## [393] "loss of brother"                         
## [394] "security guard"                          
## [395] "repressed memory"                        
## [396] "theremin"                                
## [397] "falling from height"                     
## [398] "fictional city"                          
## [399] "blockbuster"                             
## [400] "super villain"                           
## [401] "inkblot"                                 
## [402] "father figure"                           
## [403] "mask"                                    
## [404] "helicopter"                              
## [405] "forensic psychologist"                   
## [406] "genius"                                  
## [407] "street gang"                             
## [408] "character name in title"                 
## [409] "surprise ending"