Sys.setenv(LANG = "en")
library("tm")
## Loading required package: NLP
library("SnowballC")
library("wordcloud")
## Loading required package: RColorBrewer
library("RColorBrewer")
text <- readLines(("~/Documents/Thor.Ragnarok.txt"), warn=FALSE, encoding = "UTF-8")
docs <- Corpus(VectorSource(text))
# convertir a minusculas
docs <- tm_map(docs, content_transformer(tolower))
## Warning in tm_map.SimpleCorpus(docs, content_transformer(tolower)):
## transformation drops documents
# quitar numeros
docs <- tm_map(docs, removeNumbers)
## Warning in tm_map.SimpleCorpus(docs, removeNumbers): transformation drops
## documents
# quitar stopwords en ingles
docs <- tm_map(docs, removeWords, stopwords("english"))
## Warning in tm_map.SimpleCorpus(docs, removeWords, stopwords("english")):
## transformation drops documents
# Quitar signos de puntuacion
docs <- tm_map(docs, removePunctuation)
## Warning in tm_map.SimpleCorpus(docs, removePunctuation): transformation drops
## documents
# Eliminar espacios en blanco
docs <- tm_map(docs, stripWhitespace)
## Warning in tm_map.SimpleCorpus(docs, stripWhitespace): transformation drops
## documents
dtm <- TermDocumentMatrix(docs)
m <- as.matrix(dtm)
v <- sort(rowSums(m),decreasing=TRUE)
d <- data.frame(word = names(v),freq=v)
head(d, 100)
## word freq
## like like 56
## just just 52
## know know 49
## asgard asgard 45
## one one 44
## get get 39
## back back 35
## need need 35
## see see 33
## hulk hulk 31
## now now 30
## come come 29
## right right 29
## yeah yeah 28
## can can 28
## help help 28
## hey hey 26
## thor thor 25
## time time 24
## okay okay 23
## gonna gonna 22
## people people 22
## really really 21
## going going 21
## loki loki 20
## good good 20
## banner banner 20
## big big 19
## stop stop 19
## place place 19
## yes yes 19
## thunder thunder 19
## sorry sorry 18
## odin odin 17
## will will 17
## well well 16
## think think 16
## stay stay 16
## brother brother 16
## look look 15
## say say 15
## father father 14
## great great 14
## tell tell 14
## even even 14
## away away 14
## thing thing 14
## god god 14
## grandmaster grandmaster 14
## never never 14
## thought thought 13
## take take 13
## wanna wanna 13
## friend friend 13
## earth earth 12
## home home 12
## throne throne 12
## two two 12
## hammer hammer 12
## alive alive 11
## ragnarok ragnarok 11
## wait wait 11
## listen listen 11
## thank thank 11
## got got 11
## always always 11
## man man 11
## please please 11
## suns suns 11
## planet planet 10
## death death 10
## still still 10
## let let 10
## fight fight 10
## heimdall heimdall 10
## done done 10
## best best 10
## hela hela 10
## love love 10
## sakaar sakaar 10
## bad bad 10
## ship ship 10
## die die 9
## feel feel 9
## crown crown 9
## asgards asgards 9
## sword sword 9
## power power 9
## looks looks 9
## access access 9
## together together 9
## many many 9
## nothing nothing 9
## fine fine 9
## something something 9
## champion champion 9
## valkyrie valkyrie 9
## getting getting 9
## way way 8
## fire fire 8
set.seed(1234)
wordcloud(words = d$word, freq = d$freq, min.freq = 5,
max.words=Inf, random.order=T, rot.per=0.5,
colors=brewer.pal(8, "Dark2"))
## Warning in wordcloud(words = d$word, freq = d$freq, min.freq = 5, max.words =
## Inf, : asgard could not be fit on page. It will not be plotted.
## Warning in wordcloud(words = d$word, freq = d$freq, min.freq = 5, max.words =
## Inf, : getting could not be fit on page. It will not be plotted.
## Warning in wordcloud(words = d$word, freq = d$freq, min.freq = 5, max.words =
## Inf, : need could not be fit on page. It will not be plotted.
## Warning in wordcloud(words = d$word, freq = d$freq, min.freq = 5, max.words =
## Inf, : time could not be fit on page. It will not be plotted.
## Warning in wordcloud(words = d$word, freq = d$freq, min.freq = 5, max.words =
## Inf, : will could not be fit on page. It will not be plotted.
## Warning in wordcloud(words = d$word, freq = d$freq, min.freq = 5, max.words =
## Inf, : done could not be fit on page. It will not be plotted.
## Warning in wordcloud(words = d$word, freq = d$freq, min.freq = 5, max.words =
## Inf, : good could not be fit on page. It will not be plotted.
## Warning in wordcloud(words = d$word, freq = d$freq, min.freq = 5, max.words =
## Inf, : father could not be fit on page. It will not be plotted.
## Warning in wordcloud(words = d$word, freq = d$freq, min.freq = 5, max.words =
## Inf, : stupid could not be fit on page. It will not be plotted.
## Warning in wordcloud(words = d$word, freq = d$freq, min.freq = 5, max.words =
## Inf, : sorry could not be fit on page. It will not be plotted.
## Warning in wordcloud(words = d$word, freq = d$freq, min.freq = 5, max.words =
## Inf, : yeah could not be fit on page. It will not be plotted.
## Warning in wordcloud(words = d$word, freq = d$freq, min.freq = 5, max.words =
## Inf, : smash could not be fit on page. It will not be plotted.
## Warning in wordcloud(words = d$word, freq = d$freq, min.freq = 5, max.words =
## Inf, : grandmaster could not be fit on page. It will not be plotted.
## Warning in wordcloud(words = d$word, freq = d$freq, min.freq = 5, max.words =
## Inf, : friends could not be fit on page. It will not be plotted.
## Warning in wordcloud(words = d$word, freq = d$freq, min.freq = 5, max.words =
## Inf, : home could not be fit on page. It will not be plotted.

findFreqTerms(dtm, lowfreq = 5)
## [1] "know" "now" "thinking" "get" "well"
## [6] "answer" "just" "long" "earth" "see"
## [11] "time" "planet" "things" "death" "way"
## [16] "much" "think" "odin" "son" "thor"
## [21] "surtur" "alive" "still" "ago" "father"
## [26] "thought" "years" "die" "home" "asgard"
## [31] "ragnarok" "great" "around" "back" "feel"
## [36] "like" "really" "okay" "tell" "come"
## [41] "crown" "eternal" "flame" "shall" "will"
## [46] "asgards" "sword" "give" "even" "sorry"
## [51] "let" "going" "put" "big" "away"
## [56] "throne" "power" "stop" "thing" "already"
## [61] "honest" "fire" "quite" "gonna" "looks"
## [66] "fight" "right" "wait" "made" "make"
## [71] "everything" "work" "heimdall" "job" "access"
## [76] "bifrost" "nine" "realms" "mean" "mine"
## [81] "stuff" "called" "place" "together" "stay"
## [86] "look" "one" "yeah" "can" "guy"
## [91] "sure" "hell" "brother" "take" "listen"
## [96] "help" "done" "loki" "life" "brought"
## [101] "found" "many" "day" "little" "people"
## [106] "better" "though" "maybe" "turn" "thank"
## [111] "every" "best" "yes" "course" "nothing"
## [116] "face" "gone" "fine" "without" "kill"
## [121] "hey" "god" "thunder" "keep" "may"
## [126] "bring" "call" "said" "need" "hair"
## [131] "something" "forget" "good" "kind" "end"
## [136] "coming" "talking" "goddess" "hela" "sister"
## [141] "whatever" "must" "love" "want" "got"
## [146] "odins" "dead" "welcome" "last" "dying"
## [151] "palace" "sakaar" "lost" "grandmaster" "first"
## [156] "always" "say" "champion" "man" "wanna"
## [161] "please" "happened" "friend" "friends" "two"
## [166] "stupid" "real" "lord" "never" "revolution"
## [171] "anyone" "hate" "another" "doug" "bad"
## [176] "told" "hammer" "talk" "valkyrie" "hulk"
## [181] "banner" "getting" "low" "suns" "trying"
## [186] "team" "smash" "angry" "avenger" "denied"
## [191] "tony" "probably" "anus" "ship"
findAssocs(dtm, terms = "loki", corlimit = 0.2)
## $loki
## wounds commemorate tragedy lift lokis growth
## 0.21 0.21 0.21 0.21 0.21 0.21
findAssocs(dtm, terms = "thor", corlimit = 0.2)
## $thor
## son
## 0.4
findAssocs(dtm, terms = "hulk", corlimit = 0.2)
## $hulk
## fire always
## 0.22 0.21
barplot(d[1:10,]$freq, las = 2, names.arg = d[1:10,]$word,
col ="lightblue", main ="Most frequent words",
ylab = "Word frequencies")
