Sys.setenv(LANG = "en")
library("tm") 
## Loading required package: NLP
library("SnowballC")
library("wordcloud")
## Loading required package: RColorBrewer
library("RColorBrewer")
text <- readLines(("~/Documents/Thor.Ragnarok.txt"), warn=FALSE, encoding = "UTF-8")
docs <- Corpus(VectorSource(text))
# convertir a minusculas
docs <- tm_map(docs, content_transformer(tolower))
## Warning in tm_map.SimpleCorpus(docs, content_transformer(tolower)):
## transformation drops documents
# quitar numeros
docs <- tm_map(docs, removeNumbers)
## Warning in tm_map.SimpleCorpus(docs, removeNumbers): transformation drops
## documents
# quitar stopwords en ingles
docs <- tm_map(docs, removeWords, stopwords("english"))
## Warning in tm_map.SimpleCorpus(docs, removeWords, stopwords("english")):
## transformation drops documents
# Quitar signos de puntuacion
docs <- tm_map(docs, removePunctuation)
## Warning in tm_map.SimpleCorpus(docs, removePunctuation): transformation drops
## documents
# Eliminar espacios en blanco
docs <- tm_map(docs, stripWhitespace) 
## Warning in tm_map.SimpleCorpus(docs, stripWhitespace): transformation drops
## documents
dtm <- TermDocumentMatrix(docs)
m <- as.matrix(dtm)
v <- sort(rowSums(m),decreasing=TRUE)
d <- data.frame(word = names(v),freq=v)
head(d, 100)
##                    word freq
## like               like   56
## just               just   52
## know               know   49
## asgard           asgard   45
## one                 one   44
## get                 get   39
## back               back   35
## need               need   35
## see                 see   33
## hulk               hulk   31
## now                 now   30
## come               come   29
## right             right   29
## yeah               yeah   28
## can                 can   28
## help               help   28
## hey                 hey   26
## thor               thor   25
## time               time   24
## okay               okay   23
## gonna             gonna   22
## people           people   22
## really           really   21
## going             going   21
## loki               loki   20
## good               good   20
## banner           banner   20
## big                 big   19
## stop               stop   19
## place             place   19
## yes                 yes   19
## thunder         thunder   19
## sorry             sorry   18
## odin               odin   17
## will               will   17
## well               well   16
## think             think   16
## stay               stay   16
## brother         brother   16
## look               look   15
## say                 say   15
## father           father   14
## great             great   14
## tell               tell   14
## even               even   14
## away               away   14
## thing             thing   14
## god                 god   14
## grandmaster grandmaster   14
## never             never   14
## thought         thought   13
## take               take   13
## wanna             wanna   13
## friend           friend   13
## earth             earth   12
## home               home   12
## throne           throne   12
## two                 two   12
## hammer           hammer   12
## alive             alive   11
## ragnarok       ragnarok   11
## wait               wait   11
## listen           listen   11
## thank             thank   11
## got                 got   11
## always           always   11
## man                 man   11
## please           please   11
## suns               suns   11
## planet           planet   10
## death             death   10
## still             still   10
## let                 let   10
## fight             fight   10
## heimdall       heimdall   10
## done               done   10
## best               best   10
## hela               hela   10
## love               love   10
## sakaar           sakaar   10
## bad                 bad   10
## ship               ship   10
## die                 die    9
## feel               feel    9
## crown             crown    9
## asgards         asgards    9
## sword             sword    9
## power             power    9
## looks             looks    9
## access           access    9
## together       together    9
## many               many    9
## nothing         nothing    9
## fine               fine    9
## something     something    9
## champion       champion    9
## valkyrie       valkyrie    9
## getting         getting    9
## way                 way    8
## fire               fire    8
set.seed(1234)
wordcloud(words = d$word, freq = d$freq, min.freq = 5,
          max.words=Inf, random.order=T, rot.per=0.5, 
          colors=brewer.pal(8, "Dark2"))
## Warning in wordcloud(words = d$word, freq = d$freq, min.freq = 5, max.words =
## Inf, : asgard could not be fit on page. It will not be plotted.
## Warning in wordcloud(words = d$word, freq = d$freq, min.freq = 5, max.words =
## Inf, : getting could not be fit on page. It will not be plotted.
## Warning in wordcloud(words = d$word, freq = d$freq, min.freq = 5, max.words =
## Inf, : need could not be fit on page. It will not be plotted.
## Warning in wordcloud(words = d$word, freq = d$freq, min.freq = 5, max.words =
## Inf, : time could not be fit on page. It will not be plotted.
## Warning in wordcloud(words = d$word, freq = d$freq, min.freq = 5, max.words =
## Inf, : will could not be fit on page. It will not be plotted.
## Warning in wordcloud(words = d$word, freq = d$freq, min.freq = 5, max.words =
## Inf, : done could not be fit on page. It will not be plotted.
## Warning in wordcloud(words = d$word, freq = d$freq, min.freq = 5, max.words =
## Inf, : good could not be fit on page. It will not be plotted.
## Warning in wordcloud(words = d$word, freq = d$freq, min.freq = 5, max.words =
## Inf, : father could not be fit on page. It will not be plotted.
## Warning in wordcloud(words = d$word, freq = d$freq, min.freq = 5, max.words =
## Inf, : stupid could not be fit on page. It will not be plotted.
## Warning in wordcloud(words = d$word, freq = d$freq, min.freq = 5, max.words =
## Inf, : sorry could not be fit on page. It will not be plotted.
## Warning in wordcloud(words = d$word, freq = d$freq, min.freq = 5, max.words =
## Inf, : yeah could not be fit on page. It will not be plotted.
## Warning in wordcloud(words = d$word, freq = d$freq, min.freq = 5, max.words =
## Inf, : smash could not be fit on page. It will not be plotted.
## Warning in wordcloud(words = d$word, freq = d$freq, min.freq = 5, max.words =
## Inf, : grandmaster could not be fit on page. It will not be plotted.
## Warning in wordcloud(words = d$word, freq = d$freq, min.freq = 5, max.words =
## Inf, : friends could not be fit on page. It will not be plotted.
## Warning in wordcloud(words = d$word, freq = d$freq, min.freq = 5, max.words =
## Inf, : home could not be fit on page. It will not be plotted.

findFreqTerms(dtm, lowfreq = 5)
##   [1] "know"        "now"         "thinking"    "get"         "well"       
##   [6] "answer"      "just"        "long"        "earth"       "see"        
##  [11] "time"        "planet"      "things"      "death"       "way"        
##  [16] "much"        "think"       "odin"        "son"         "thor"       
##  [21] "surtur"      "alive"       "still"       "ago"         "father"     
##  [26] "thought"     "years"       "die"         "home"        "asgard"     
##  [31] "ragnarok"    "great"       "around"      "back"        "feel"       
##  [36] "like"        "really"      "okay"        "tell"        "come"       
##  [41] "crown"       "eternal"     "flame"       "shall"       "will"       
##  [46] "asgards"     "sword"       "give"        "even"        "sorry"      
##  [51] "let"         "going"       "put"         "big"         "away"       
##  [56] "throne"      "power"       "stop"        "thing"       "already"    
##  [61] "honest"      "fire"        "quite"       "gonna"       "looks"      
##  [66] "fight"       "right"       "wait"        "made"        "make"       
##  [71] "everything"  "work"        "heimdall"    "job"         "access"     
##  [76] "bifrost"     "nine"        "realms"      "mean"        "mine"       
##  [81] "stuff"       "called"      "place"       "together"    "stay"       
##  [86] "look"        "one"         "yeah"        "can"         "guy"        
##  [91] "sure"        "hell"        "brother"     "take"        "listen"     
##  [96] "help"        "done"        "loki"        "life"        "brought"    
## [101] "found"       "many"        "day"         "little"      "people"     
## [106] "better"      "though"      "maybe"       "turn"        "thank"      
## [111] "every"       "best"        "yes"         "course"      "nothing"    
## [116] "face"        "gone"        "fine"        "without"     "kill"       
## [121] "hey"         "god"         "thunder"     "keep"        "may"        
## [126] "bring"       "call"        "said"        "need"        "hair"       
## [131] "something"   "forget"      "good"        "kind"        "end"        
## [136] "coming"      "talking"     "goddess"     "hela"        "sister"     
## [141] "whatever"    "must"        "love"        "want"        "got"        
## [146] "odins"       "dead"        "welcome"     "last"        "dying"      
## [151] "palace"      "sakaar"      "lost"        "grandmaster" "first"      
## [156] "always"      "say"         "champion"    "man"         "wanna"      
## [161] "please"      "happened"    "friend"      "friends"     "two"        
## [166] "stupid"      "real"        "lord"        "never"       "revolution" 
## [171] "anyone"      "hate"        "another"     "doug"        "bad"        
## [176] "told"        "hammer"      "talk"        "valkyrie"    "hulk"       
## [181] "banner"      "getting"     "low"         "suns"        "trying"     
## [186] "team"        "smash"       "angry"       "avenger"     "denied"     
## [191] "tony"        "probably"    "anus"        "ship"
findAssocs(dtm, terms = "loki", corlimit = 0.2)
## $loki
##      wounds commemorate     tragedy        lift       lokis      growth 
##        0.21        0.21        0.21        0.21        0.21        0.21
findAssocs(dtm, terms = "thor", corlimit = 0.2)
## $thor
## son 
## 0.4
findAssocs(dtm, terms = "hulk", corlimit = 0.2)
## $hulk
##   fire always 
##   0.22   0.21
barplot(d[1:10,]$freq, las = 2, names.arg = d[1:10,]$word,
        col ="lightblue", main ="Most frequent words",
        ylab = "Word frequencies")