Librerias
library(readr)
library(dplyr)
library(stringr)
library(lubridate)
library(ggplot2)
library(tm)
library(SnowballC)
library(wordcloud)
library(RColorBrewer)
library(udpipe)
library(lattice)
library(tidyr)
Dataset
kindle <- read_csv("/Users/mariirobles/Desktop/all_kindle_review .csv")
glimpse(kindle)
Rows: 12,000
Columns: 11
$ ...1 <dbl> 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, …
$ `Unnamed: 0` <dbl> 11539, 5957, 9146, 7038, 1776, 3744, 13641, 4448, 2797, 5294, 47234, 9…
$ asin <chr> "B0033UV8HI", "B002HJV4DE", "B002ZG96I4", "B002QHWOEU", "B001A06VJ8", …
$ helpful <chr> "[8, 10]", "[1, 1]", "[0, 0]", "[1, 3]", "[0, 1]", "[6, 6]", "[1, 1]",…
$ rating <dbl> 3, 5, 3, 3, 4, 5, 2, 4, 5, 4, 1, 4, 1, 4, 5, 2, 4, 1, 5, 4, 4, 5, 5, 4…
$ reviewText <chr> "Jace Rankin may be short, but he's nothing to mess with, as the man w…
$ reviewTime <chr> "09 2, 2010", "10 8, 2013", "04 11, 2014", "07 5, 2014", "12 31, 2012"…
$ reviewerID <chr> "A3HHXRELK8BHQG", "A2RGNZ0TRF578I", "A3S0H2HV6U1I7F", "AC4OQW3GZ919J",…
$ reviewerName <chr> "Ridley", "Holly Butler", "Merissa", "Cleargrace", "Rjostler", "Aubrie…
$ summary <chr> "Entertaining But Average", "Terrific menage scenes!", "Snapdragon All…
$ unixReviewTime <dbl> 1283385600, 1381190400, 1397174400, 1404518400, 1356912000, 1260144000…
summary(kindle)
...1 Unnamed: 0 asin helpful rating
Min. : 0 Min. : 0 Length:12000 Length:12000 Min. :1.00
1st Qu.: 3000 1st Qu.: 3000 Class :character Class :character 1st Qu.:2.00
Median : 6000 Median : 6000 Mode :character Mode :character Median :3.50
Mean : 6000 Mean :10024 Mean :3.25
3rd Qu.: 8999 3rd Qu.:12476 3rd Qu.:4.25
Max. :11999 Max. :47770 Max. :5.00
reviewText reviewTime reviewerID reviewerName
Length:12000 Length:12000 Length:12000 Length:12000
Class :character Class :character Class :character Class :character
Mode :character Mode :character Mode :character Mode :character
summary unixReviewTime
Length:12000 Min. :9.602e+08
Class :character 1st Qu.:1.316e+09
Mode :character Median :1.357e+09
Mean :1.345e+09
3rd Qu.:1.377e+09
Max. :1.406e+09
Limpieza inicial
kindle <- kindle %>%
mutate(
review_date = mdy(reviewTime),
helpful = str_remove_all(helpful, "\\[|\\]"),
helpful_yes = as.numeric(str_split_fixed(helpful, ",", 2)[,1]),
helpful_total = as.numeric(str_split_fixed(helpful, ",", 2)[,2]),
text_length = str_count(reviewText, "\\S+")
)
summary(kindle$review_date)
Min. 1st Qu. Median Mean 3rd Qu. Max.
"2000-06-06" "2011-09-17" "2012-12-30" "2012-08-09" "2013-08-19" "2014-07-20"
summary(kindle$rating)
Min. 1st Qu. Median Mean 3rd Qu. Max.
1.00 2.00 3.50 3.25 4.25 5.00
summary(kindle$helpful_total)
Min. 1st Qu. Median Mean 3rd Qu. Max.
0.000 0.000 1.000 2.844 2.000 355.000
summary(kindle$text_length)
Min. 1st Qu. Median Mean 3rd Qu. Max.
1.0 32.0 62.0 109.2 135.0 2156.0
Preparacion
kindle <- kindle %>%
mutate(
review_date = mdy(reviewTime),
helpful = str_remove_all(helpful, "\\[|\\]"),
helpful_yes = as.numeric(str_split_fixed(helpful, ",", 2)[,1]),
helpful_total = as.numeric(str_split_fixed(helpful, ",", 2)[,2]),
text_length = str_count(reviewText, "\\S+")
)
EDA
#distribución de ratings
ggplot(kindle, aes(x = factor(rating))) +
geom_bar(fill = "steelblue") +
labs(
title = "Distribución de calificaciones",
x = "Rating",
y = "Frecuencia"
) +
theme_minimal() +
theme(
plot.title = element_text(face = "bold")
)
## reseñas por mes
kindle %>%
mutate(month = floor_date(review_date, "month")) %>%
count(month) %>%
ggplot(aes(x = month, y = n)) +
geom_line() +
geom_point() +
labs(
title = "Número de reseñas por mes",
x = "Mes",
y = "Cantidad de reseñas"
) +
theme_minimal() +
theme(
plot.title = element_text(face = "bold", hjust = 0.5),
)
ggplot(kindle, aes(x = text_length)) +
geom_histogram(bins = 40, fill = "#2E7D32", color = "white") +
coord_cartesian(xlim = c(0, 1000)) +
labs(
title = "Distribución de la longitud de reseñas",
x = "Número de palabras",
y = "Frecuencia"
) +
theme_minimal() +
theme(
plot.title = element_text(size = 14, face = "bold"),
axis.title = element_text(size = 12),
axis.text = element_text(size = 10)
)
Filtrar por periodo de tiempo
#6 meses
kindle_filtrado <- kindle %>%
filter(
review_date >= as.Date("2013-01-01"),
review_date <= as.Date("2013-06-30"),
!is.na(reviewText),
reviewText != ""
)
#filtrado por rating
kindle_filtrado <- kindle_filtrado %>%
filter(rating >= 4)
nrow(kindle_filtrado)
[1] 1275
summary(kindle_filtrado$review_date)
Min. 1st Qu. Median Mean 3rd Qu. Max.
"2013-01-01" "2013-02-11" "2013-03-20" "2013-03-23" "2013-05-04" "2013-06-30"
summary(kindle_filtrado$rating)
Min. 1st Qu. Median Mean 3rd Qu. Max.
4.000 4.000 4.000 4.496 5.000 5.000
Analisis de texto
#corpus
docs <- Corpus(VectorSource(kindle_filtrado$reviewText))
docsOrig <- docs
Limpieza de texto
#stopwords
docs <- tm_map(docs, content_transformer(tolower))
docs <- tm_map(docs, removeNumbers)
docs <- tm_map(docs, removePunctuation)
docs <- tm_map(docs, stripWhitespace)
Frecuencia de palabras
dtm_raw <- TermDocumentMatrix(docs)
m_raw <- as.matrix(dtm_raw)
v_raw <- sort(rowSums(m_raw), decreasing = TRUE)
d_raw <- data.frame(word = names(v_raw), freq = v_raw)
head(d_raw, 20)
barplot(d_raw[1:20, ]$freq,
las = 2,
names.arg = d_raw[1:20, ]$word,
col = "lightblue",
main = "Palabras más frecuentes con stopwords",
ylab = "Frecuencia")
Omitir stopwords frecuentes
docs <- docsOrig
docs <- tm_map(docs, content_transformer(tolower))
docs <- tm_map(docs, removeNumbers)
docs <- tm_map(docs, removeWords, stopwords("english"))
docs <- tm_map(docs, removeWords, c("book", "kindle", "read", "one", "will", "can", "just"))
docs <- tm_map(docs, removePunctuation)
docs <- tm_map(docs, stripWhitespace)
docs <- tm_map(docs, stemDocument)
Frecuencia de palabras sin stopwords
dtm <- TermDocumentMatrix(docs)
m <- as.matrix(dtm)
v <- sort(rowSums(m), decreasing = TRUE)
d <- data.frame(word = names(v), freq = v)
head(d, 20)
barplot(d[1:20, ]$freq,
las = 2,
names.arg = d[1:20, ]$word,
col = "cadetblue",
main = "Palabras más frecuentes sin stopwords",
ylab = "Frecuencia")
Asociaciones relevantes
#top 10 palabras
top10_words <- d$word[1:10]
top10_words
[1] "stori" "love" "like" "good" "charact" "enjoy" "seri" "book" "great"
[10] "realli"
findAssocs(dtm, terms = top10_words[1], corlimit = 0.15)
$stori
agreement bloodi feather flash
0.38 0.38 0.38 0.38
gritti hardboil havent jacquelin
0.38 0.38 0.38 0.38
lessright meati phinea ruffl
0.38 0.38 0.38 0.38
sock sour stuffr symbio
0.38 0.38 0.38 0.38
troutt major familiar brief
0.38 0.34 0.32 0.31
bat standalon whiskey clever
0.31 0.31 0.31 0.30
origin isnt disturb proof
0.28 0.28 0.28 0.27
angl equal led unpredict
0.26 0.26 0.26 0.26
ebook point also board
0.25 0.25 0.24 0.24
chill supernatur horror baddi
0.24 0.24 0.24 0.24
charact kind tale section
0.23 0.23 0.23 0.23
countrysideprob dick fixat fox
0.23 0.23 0.23 0.23
goldilock han ilk impli
0.23 0.23 0.23 0.23
industri intrud mice puss
0.23 0.23 0.23 0.23
recit resolut roam slang
0.23 0.23 0.23 0.23
whittington king collect massiv
0.23 0.22 0.22 0.22
dish sell singl compel
0.22 0.22 0.22 0.21
set stand safe remind
0.21 0.21 0.21 0.21
sold silli short like
0.21 0.21 0.20 0.20
jack reader english ghost
0.20 0.20 0.20 0.20
say mix strand dont
0.20 0.20 0.20 0.20
survivor includ intro differ
0.20 0.19 0.19 0.19
fairi sometim bring name
0.19 0.19 0.19 0.19
roll oldfashion crime career
0.19 0.19 0.19 0.19
drop gruesom confront get
0.19 0.19 0.19 0.18
first entir mate throughout
0.18 0.18 0.18 0.18
sensual bang bear antagonistadversari
0.18 0.18 0.18 0.18
enjoyablegiven jori per scope
0.18 0.18 0.18 0.18
realli concept way anoth
0.17 0.17 0.17 0.17
turn mani term heroin
0.17 0.17 0.17 0.17
justic introduct fbi told
0.17 0.17 0.17 0.17
wit doesnt threat boot
0.17 0.17 0.17 0.17
whimsic enjoy littl good
0.17 0.16 0.16 0.16
sex well see favorit
0.16 0.16 0.16 0.16
next time fair stephen
0.16 0.16 0.16 0.16
attitud giant wasnt instanc
0.16 0.16 0.16 0.16
hit reign joke colonel
0.16 0.16 0.16 0.16
indiana mete murphi naturalborn
0.16 0.16 0.16 0.16
novic overload persona prefac
0.16 0.16 0.16 0.16
ritual segment stargat straightdiffer
0.16 0.16 0.16 0.16
someth action buy now
0.15 0.15 0.15 0.15
thus perhap dragon disagre
0.15 0.15 0.15 0.15
separ vari pot cement
0.15 0.15 0.15 0.15
castl
0.15
findAssocs(dtm, terms = top10_words[2], corlimit = 0.15)
$love
akin arcana dictatorshipr distort duo dylan
0.27 0.27 0.27 0.27 0.27 0.27
forefront fring moira playev prejudic roger
0.27 0.27 0.27 0.27 0.27 0.27
rogerss weaker witch werewolf rock sasha
0.27 0.27 0.27 0.25 0.25 0.25
seriesorigin want alpha tradit devour amongst
0.24 0.22 0.21 0.21 0.20 0.20
awaken closur eeri reccomend juliet romeo
0.20 0.20 0.20 0.20 0.20 0.20
stake pack imposs crew jayn rylon
0.20 0.19 0.19 0.19 0.19 0.19
asian automat even charact seri post
0.19 0.19 0.18 0.18 0.18 0.18
fell emot werewolv crush mmmm sanctuari
0.18 0.18 0.18 0.18 0.18 0.18
pnr cement dynam mentor kind zsadist
0.18 0.18 0.18 0.18 0.17 0.17
percept alanso blog cameo cobra dodg
0.17 0.17 0.17 0.17 0.17 0.17
eli fantasymik hammer ithug maxwel mmmf
0.17 0.17 0.17 0.17 0.17 0.17
muse nope powertool rod scorch squirm
0.17 0.17 0.17 0.17 0.17 0.17
kelli defend braid haven jae oooh
0.17 0.17 0.17 0.17 0.17 0.17
trunk wrist know get male colter
0.17 0.17 0.16 0.16 0.16 0.16
kgi now pain accept sinc sunset
0.16 0.16 0.16 0.16 0.16 0.16
wise among wick bookish larissa larissaslif
0.16 0.16 0.16 0.16 0.16 0.16
away need summer cruel
0.15 0.15 0.15 0.15
findAssocs(dtm, terms = top10_words[3], corlimit = 0.15)
$like
realli befallen curs delilah didnt
0.33 0.32 0.32 0.32 0.32
geniuskickbutt invis letdown mediocr previouslyswoon
0.32 0.32 0.32 0.32 0.32
showalterfifth starszan treacher umph yaycharact
0.32 0.32 0.32 0.32 0.32
nola zane scale amazon wasnt
0.31 0.30 0.30 0.29 0.29
vampir atlanti bride bug layel
0.28 0.28 0.28 0.27 0.27
thought novella villain king enslav
0.26 0.26 0.26 0.25 0.25
lust even warrior absenc acceler
0.25 0.24 0.24 0.24 0.24
bath catlik chast halfman hermit
0.24 0.24 0.24 0.24 0.24
humanlik iowa kansa kingnaughti pond
0.24 0.24 0.24 0.24 0.24
recreat skin stateanyway tornado charact
0.24 0.24 0.24 0.24 0.23
pretti game thousand time scene
0.23 0.23 0.23 0.22 0.21
decid sudden realiz eread fellow
0.21 0.21 0.21 0.21 0.21
iron whether consequ independ consort
0.21 0.21 0.21 0.21 0.21
stori also man might littl
0.20 0.20 0.20 0.20 0.19
think away mayb seem person
0.19 0.19 0.19 0.19 0.19
strang rip heart worthi lose
0.19 0.19 0.19 0.19 0.19
mix behavior palac get much
0.19 0.19 0.19 0.18 0.18
nice feel attitud satisfi court
0.18 0.18 0.18 0.18 0.18
hint sex first bit someth
0.18 0.17 0.17 0.17 0.17
romanc kind experi reader name
0.17 0.17 0.17 0.17 0.17
fierc strong sound enemi bird
0.17 0.17 0.17 0.17 0.17
swept revel hugh niko perspectiveoveral
0.17 0.17 0.17 0.17 0.17
spartan twyla know see stop
0.17 0.17 0.16 0.16 0.16
soul main lot guy meet
0.16 0.16 0.16 0.16 0.16
hate mean now throw heroin
0.16 0.16 0.16 0.16 0.16
progress ruin wealthi bare color
0.16 0.16 0.16 0.16 0.16
comparison halfbeast notic revisit wizard
0.16 0.16 0.16 0.16 0.16
japanes better find opinion small
0.16 0.15 0.15 0.15 0.15
risk enter
0.15 0.15
findAssocs(dtm, terms = top10_words[4], corlimit = 0.15)
$good
megan decis unpredict stori blue domin justic agreement
0.17 0.17 0.17 0.16 0.16 0.16 0.16 0.16
bloodi feather flash gritti hardboil havent jacquelin lessright
0.16 0.16 0.16 0.16 0.16 0.16 0.16 0.16
meati phinea ruffl sock sour stuffr symbio troutt
0.16 0.16 0.16 0.16 0.16 0.16 0.16 0.16
advantag beforehand blindfold charat derek dull endulg restrain
0.16 0.16 0.16 0.16 0.16 0.16 0.16 0.16
soley sweep phantom chill employ
0.16 0.16 0.16 0.15 0.15
findAssocs(dtm, terms = top10_words[5], corlimit = 0.15)
$charact
main develop supernatur reader
0.46 0.35 0.30 0.28
sure plot baddi novel
0.26 0.25 0.25 0.24
dialogu demon like stori
0.24 0.24 0.23 0.23
distinct asian action throughout
0.23 0.23 0.22 0.22
motiv littl well first
0.22 0.21 0.21 0.21
everybodi novella progress villain
0.21 0.21 0.21 0.21
enough time identifi scale
0.20 0.20 0.20 0.20
most lament mirandia norrel
0.20 0.20 0.20 0.20
oblivi offici semibesti twistdespit
0.20 0.20 0.20 0.20
artwork critiqu deem foil
0.20 0.20 0.20 0.20
lhung naiveti novelist satisfactori
0.20 0.20 0.20 0.20
sole starsread starsth taus
0.20 0.20 0.20 0.20
violet xxx colonel indiana
0.20 0.20 0.20 0.20
mete murphi naturalborn novic
0.20 0.20 0.20 0.20
overload persona prefac ritual
0.20 0.20 0.20 0.20
segment stargat straightdiffer carson
0.20 0.20 0.20 0.20
centr seri gorgeous much
0.20 0.19 0.19 0.19
mention review uncertainti place
0.19 0.19 0.19 0.19
mix policeman japanes specialti
0.19 0.19 0.19 0.19
switch spurt castl author
0.19 0.19 0.19 0.18
think bit love find
0.18 0.18 0.18 0.18
real abil paranorm depth
0.18 0.18 0.18 0.18
definit enslav smart strong
0.18 0.18 0.18 0.18
lack asid get provid
0.18 0.18 0.17 0.17
look descript way add
0.17 0.17 0.17 0.17
follow kind set accept
0.17 0.17 0.17 0.17
heat frustrat kate color
0.17 0.17 0.17 0.17
center inner judg holli
0.17 0.17 0.17 0.17
know realli make chang
0.16 0.16 0.16 0.16
expect ahem also despic
0.16 0.16 0.16 0.16
enraptur exagger jodi mara
0.16 0.16 0.16 0.16
redford star person believ
0.16 0.16 0.16 0.16
opinion reach paragraph sweet
0.16 0.16 0.16 0.16
after attitud controversi counsel
0.16 0.16 0.16 0.16
cree jenni macho trauma
0.16 0.16 0.16 0.16
unquest unrealist verbal whack
0.16 0.16 0.16 0.16
passion thus appear vampir
0.16 0.16 0.16 0.16
battl befallen curs delilah
0.16 0.16 0.16 0.16
didnt fight geniuskickbutt invis
0.16 0.16 0.16 0.16
iron letdown likabl mediocr
0.16 0.16 0.16 0.16
previouslyswoon showalterfifth starszan treacher
0.16 0.16 0.16 0.16
umph yaycharact mechan graphic
0.16 0.16 0.16 0.16
taken endear drawn prais
0.16 0.16 0.16 0.16
sooner crew mmmm notic
0.16 0.16 0.16 0.16
pot antagonist dement extend
0.16 0.16 0.16 0.16
meaning tch antagonistadversari enjoyablegiven
0.16 0.16 0.16 0.16
jori per scope hair
0.16 0.16 0.16 0.16
braid haven jae oooh
0.16 0.16 0.16 0.16
trunk wrist made tension
0.16 0.16 0.15 0.15
mani happili quest nola
0.15 0.15 0.15 0.15
grand expand
0.15 0.15
findAssocs(dtm, terms = top10_words[6], corlimit = 0.15)
$enjoy
antagonistadversari enjoyablegiven jori per
0.23 0.23 0.23 0.23
scope thorough thus mate
0.23 0.22 0.22 0.21
threat baddi dement extend
0.20 0.19 0.18 0.18
meaning tch far bond
0.18 0.18 0.17 0.17
read supernatur alreadi stori
0.17 0.17 0.16 0.16
antagonist realli blind oldi
0.16 0.15 0.15 0.15
simpli pot central
0.15 0.15 0.15
findAssocs(dtm, terms = top10_words[7], corlimit = 0.15)
$seri
mate dragon thus antagonistadversari
0.31 0.29 0.28 0.27
enjoyablegiven jori per scope
0.27 0.27 0.27 0.27
paranorm supernatur threat stand
0.24 0.24 0.24 0.23
knight alreadi first lost
0.23 0.22 0.22 0.22
entir alpha book lord
0.22 0.22 0.21 0.21
standalon underworld baddi crack
0.21 0.21 0.21 0.20
addit charact next start
0.20 0.19 0.19 0.19
review rest psycop consid
0.19 0.19 0.19 0.19
introduc oldi fleet pot
0.19 0.19 0.19 0.19
injuri dauntless displac fearless
0.19 0.19 0.19 0.19
frontier geari genius invinc
0.19 0.19 0.19 0.19
ofth oppon relentless relentlessandth
0.19 0.19 0.19 0.19
syndic tactic tangibl valiant
0.19 0.19 0.19 0.19
far bond love protect
0.18 0.18 0.18 0.18
track seek order left
0.18 0.18 0.17 0.17
dish healer allianc usual
0.17 0.17 0.17 0.16
continu claim victori megan
0.16 0.16 0.16 0.16
progress somewhat blood serv
0.16 0.16 0.16 0.16
amus frustrat dragonrid captain
0.16 0.16 0.16 0.16
hob actual warn prequel
0.16 0.15 0.15 0.15
second part betterp gushier
0.15 0.15 0.15 0.15
meatier plow plus risk
0.15 0.15 0.15 0.15
delv geryon kadenc lucif
0.15 0.15 0.15 0.15
oppress pandora storyprequel rang
0.15 0.15 0.15 0.15
feelingsof helio hurtand intergr
0.15 0.15 0.15 0.15
involvedwith jinn kaden lucia
0.15 0.15 0.15 0.15
matelucia reynor
0.15 0.15
findAssocs(dtm, terms = top10_words[8], corlimit = 0.15)
$book
bundl hoot cian seri blood irishman illustr review elizabeth
0.26 0.22 0.22 0.21 0.18 0.18 0.17 0.16 0.16
order three
0.16 0.15
findAssocs(dtm, terms = top10_words[9], corlimit = 0.15)
$great
kendl lamong underground chanceshan fiend hash
0.16 0.16 0.16 0.16 0.16 0.16
maneuv miscalcul orbit oxley paig rey
0.16 0.16 0.16 0.16 0.16 0.16
scenessometim shameless shane toiletnow twocharact premi
0.16 0.16 0.16 0.16 0.16 0.16
findAssocs(dtm, terms = top10_words[10], corlimit = 0.15)
$realli
wasnt befallen curs delilah didnt
0.47 0.46 0.46 0.46 0.46
geniuskickbutt invis letdown mediocr nola
0.46 0.46 0.46 0.46 0.46
previouslyswoon showalterfifth starszan treacher umph
0.46 0.46 0.46 0.46 0.46
yaycharact scale amazon warrior atlanti
0.46 0.45 0.44 0.43 0.43
zane bride iron layel villain
0.42 0.42 0.39 0.39 0.39
bug die enemi like rip
0.35 0.34 0.34 0.33 0.33
stop novella vampir eread fellow
0.32 0.32 0.32 0.32 0.32
enslav game heart worst broken
0.31 0.31 0.30 0.29 0.28
whether thought attitud fierc worthi
0.28 0.26 0.26 0.26 0.26
hate bother lose satisfi root
0.25 0.24 0.23 0.22 0.22
get scream lust presenc terribl
0.21 0.21 0.21 0.21 0.21
even know want made lot
0.20 0.20 0.20 0.20 0.20
also gena someth male pretti
0.20 0.20 0.19 0.19 0.19
cute break ruin romanc time
0.19 0.19 0.19 0.18 0.18
huge delus facil hallucin institut
0.18 0.18 0.18 0.18 0.18
pressur prom ration schizophren seriesoveral
0.18 0.18 0.18 0.18 0.18
seriesyearold therapist whilst yay immedi
0.18 0.18 0.18 0.18 0.18
stori mention cover dot embarrass
0.17 0.17 0.17 0.17 0.17
femal compar lack likabl bookish
0.17 0.17 0.17 0.17 0.17
larissa larissaslif honor charact lock
0.17 0.17 0.17 0.16 0.16
main much now realiz anyway
0.16 0.16 0.16 0.16 0.16
enjoy see got met amaz
0.15 0.15 0.15 0.15 0.15
opinion chest chines stephen militari
0.15 0.15 0.15 0.15 0.15
care flat point
0.15 0.15 0.15
Modelo del lenguaje
library(udpipe)
modelo_ing <- udpipe_download_model(language = "english")
trying URL 'https://raw.githubusercontent.com/jwijffels/udpipe.models.ud.2.5/master/inst/udpipe-ud-2.5-191206/english-ewt-ud-2.5-191206.udpipe'
Content type 'application/octet-stream' length 16309608 bytes (15.6 MB)
==================================================
downloaded 15.6 MB
udmodel_english <- udpipe_load_model(modelo_ing$file_model)
s <- udpipe_annotate(udmodel_english, kindle_filtrado$reviewText)
x <- data.frame(s)
Palabras frecuentes por clasificación Susutantivos
stats <- subset(x, upos %in% c("NOUN"))
stats <- txt_freq(stats$token)
stats$key <- factor(stats$key, levels = rev(stats$key))
head(stats, 20)
barchart(key ~ freq, data = head(stats, 20),
col = "pink",
main = "Sustantivos",
xlab = "Frecuencia")
Adjetivos
stats <- subset(x, upos %in% c("ADJ"))
stats <- txt_freq(stats$token)
stats$key <- factor(stats$key, levels = rev(stats$key))
head(stats, 20)
barchart(key ~ freq, data = head(stats, 20),
col = "purple",
main = "Adjetivos",
xlab = "Frecuencia")
Verbos
stats <- subset(x, upos %in% c("VERB"))
stats <- txt_freq(stats$token)
stats$key <- factor(stats$key, levels = rev(stats$key))
head(stats, 20)
barchart(key ~ freq, data = head(stats, 20),
col = "pink",
main = "Verbos",
xlab = "Frecuencia")
Algoritmo RAKE
stats <- keywords_rake(
x = x,
term = "lemma",
group = "doc_id",
relevant = x$upos %in% c("NOUN", "ADJ")
)
stats$key <- factor(stats$keyword, levels = rev(stats$keyword))
barchart(key ~ rake, data = head(subset(stats, freq > 3), 20),
col = "violet",
main = "Frases identificadas",
xlab = "Rake")
Frases comunmente utilizada
x$phrase_tag <- as_phrasemachine(x$upos, type = "upos")
stats <- keywords_phrases(
x = x$phrase_tag,
term = tolower(x$token),
pattern = "(A|N)*N(P+D*(A|N)*N)*",
is_regex = TRUE,
detailed = FALSE
)
stats <- subset(stats, ngram > 1 & freq > 3)
stats$key <- factor(stats$keyword, levels = rev(stats$keyword))
barchart(key ~ freq, data = head(stats, 20),
col = "magenta",
main = "Keywords - simple noun phrases",
xlab = "Frequency")
Nube de palabras
set.seed(123)
wordcloud(words = d$word,
freq = d$freq,
min.freq = 15,
max.words = 80,
random.order = FALSE,
rot.per = 0.2,
scale = c(4, 0.8),
colors = brewer.pal(8, "Dark2"))
wordcloud(words = d$word,
freq = d$freq,
min.freq = 10,
max.words = 100,
random.order = FALSE,
rot.per = 0.35,
colors = brewer.pal(8, "Set2"))
# =========================
# ANALISIS 2
# Periodo: enero-junio 2013
# Rating bajo: <= 2
# =========================
kindle_filtrado2 <- kindle %>%
filter(
review_date >= as.Date("2013-01-01"),
review_date <= as.Date("2013-06-30"),
!is.na(reviewText),
reviewText != "",
rating <= 2
)
nrow(kindle_filtrado2)
[1] 770
summary(kindle_filtrado2$review_date)
Min. 1st Qu. Median Mean 3rd Qu. Max.
"2013-01-01" "2013-02-09" "2013-03-20" "2013-03-23" "2013-05-04" "2013-06-30"
summary(kindle_filtrado2$rating)
Min. 1st Qu. Median Mean 3rd Qu. Max.
1.000 1.000 2.000 1.503 2.000 2.000
#Corpus
docs2 <- Corpus(VectorSource(kindle_filtrado2$reviewText))
docs2Orig <- docs2
#Limpieza
docs2 <- tm_map(docs2, content_transformer(tolower))
docs2 <- tm_map(docs2, removeNumbers)
docs2 <- tm_map(docs2, removePunctuation)
docs2 <- tm_map(docs2, stripWhitespace)
#Frecuencia de palabras
docs2 <- docs2Orig
docs2 <- tm_map(docs2, content_transformer(tolower))
docs2 <- tm_map(docs2, removeNumbers)
docs2 <- tm_map(docs2, removeWords, stopwords("english"))
docs2 <- tm_map(docs2, removeWords, c("book", "kindle", "read", "one", "will", "can", "just"))
docs2 <- tm_map(docs2, removePunctuation)
docs2 <- tm_map(docs2, stripWhitespace)
docs2 <- tm_map(docs2, stemDocument)
Frecuencia de palabras con stopwords
dtm2 <- TermDocumentMatrix(docs2)
m2 <- as.matrix(dtm2)
v2 <- sort(rowSums(m2), decreasing = TRUE)
d2 <- data.frame(word = names(v2), freq = v2)
head(d2, 20)
barplot(d2[1:20, ]$freq,
las = 2,
names.arg = d2[1:20, ]$word,
col = "tomato",
main = "Palabras más frecuentes sin stopwords (rating <= 2)",
ylab = "Frecuencia")
#Asociaciones relevantes
top10_words2 <- d2$word[1:10]
top10_words2
[1] "stori" "like" "charact" "get" "good" "time" "realli" "author" "short"
[10] "read"
findAssocs(dtm2, terms = top10_words2[1], corlimit = 0.15)
$stori
short duo alibi align contrast dysfunct
0.36 0.35 0.34 0.34 0.34 0.34
fourth hill homicid hoosegow husbandwif lonesom
0.34 0.34 0.34 0.34 0.34 0.34
lunch mick mysteryreveng result specif stickpin
0.34 0.34 0.34 0.34 0.34 0.34
strangler trail wellpac wellwritten west collect
0.34 0.34 0.34 0.34 0.34 0.33
stark featur transit reader nonsens advent
0.32 0.31 0.29 0.28 0.28 0.28
buri calendar incent increas layout moot
0.28 0.28 0.28 0.28 0.28 0.28
poignant retold storybook unnecessarili end two
0.28 0.28 0.28 0.28 0.27 0.26
full closur invent unfinish make author
0.25 0.25 0.25 0.25 0.24 0.23
howev justifi complet detail old qualiti
0.23 0.23 0.22 0.22 0.22 0.22
classic requir antholog collector critic deadlin
0.22 0.22 0.22 0.22 0.22 0.22
dishearten farbut peek sadden worthi base
0.22 0.22 0.22 0.22 0.22 0.21
tale mess charact reread promis genr
0.21 0.21 0.20 0.20 0.20 0.20
issu multipl major preview amateurish problem
0.20 0.20 0.20 0.20 0.20 0.19
easili introduc satisfi charlott departur vulner
0.19 0.19 0.19 0.19 0.19 0.19
abound angl argument blair blatti bottl
0.19 0.19 0.19 0.19 0.19 0.19
cheryl corrupt disguis epilept exorc exorcist
0.19 0.19 0.19 0.19 0.19 0.19
faith godfath guilt implic inhabit jesuit
0.19 0.19 0.19 0.19 0.19 0.19
nonbeliev nuanc pea religion ripoff satan
0.19 0.19 0.19 0.19 0.19 0.19
sin skill soup stanc subtext tract
0.19 0.19 0.19 0.19 0.19 0.19
vanish vessel western womb hunt follow
0.19 0.19 0.19 0.19 0.19 0.18
page horror first final say skip
0.18 0.18 0.18 0.18 0.18 0.18
case elsewher term discuss flesh receiv
0.18 0.18 0.18 0.18 0.18 0.18
engag sneak still enough section upsid
0.18 0.18 0.17 0.17 0.17 0.17
point begin whole line although consist
0.17 0.17 0.17 0.17 0.17 0.17
close narrat somewhat fizzl third smell
0.17 0.17 0.17 0.17 0.17 0.17
passion music parent complex religi feel
0.17 0.17 0.17 0.17 0.17 0.16
longer anoth knew better william plan
0.16 0.16 0.16 0.16 0.16 0.16
pace young fit differ small photo
0.16 0.16 0.16 0.16 0.16 0.16
understood cop mediev deepli craigslist decad
0.16 0.16 0.16 0.16 0.16 0.16
digit enclosesnop fifti flame grandpar health
0.16 0.16 0.16 0.16 0.16 0.16
manuscript medic middleag ofth patterson poll
0.16 0.16 0.16 0.16 0.16 0.16
sic sophor uneven aris bearabl clandestin
0.16 0.16 0.16 0.16 0.16 0.16
clark climat deliv dive excia grizzl
0.16 0.16 0.16 0.16 0.16 0.16
ident killerfirst lawyer nose pair patricia
0.16 0.16 0.16 0.16 0.16 0.16
profil regul singer station undoubt utmost
0.16 0.16 0.16 0.16 0.16 0.16
washington took entir great set probabl
0.16 0.15 0.15 0.15 0.15 0.15
seem never abl drop reveng childhood
0.15 0.15 0.15 0.15 0.15 0.15
contain hear opposit discov retel
0.15 0.15 0.15 0.15 0.15
findAssocs(dtm2, terms = top10_words2[2], corlimit = 0.15)
$like
rather behavior wing eat stalker
0.44 0.44 0.43 0.42 0.42
ala allud analog beard benefit
0.42 0.42 0.42 0.42 0.42
bent breath brule candi candyslick
0.42 0.42 0.42 0.42 0.42
chocol cinnamon cregrav crumb dehuman
0.42 0.42 0.42 0.42 0.42
dicken disgrac faster fist forg
0.42 0.42 0.42 0.42 0.42
glint golden hallefrickinglujah hammer harsher
0.42 0.42 0.42 0.42 0.42
het hood hover hunter lanc
0.42 0.42 0.42 0.42 0.42
lift merriamwebst metal miracl pennon
0.42 0.42 0.42 0.42 0.42
perspir pillow poni pound pump
0.42 0.42 0.42 0.42 0.42
sappi sappiest secur sedgwick sheen
0.42 0.42 0.42 0.42 0.42
shine sincer slick souffleacut stardustr
0.42 0.42 0.42 0.42 0.42
sticki streamer stress stroke sued
0.42 0.42 0.42 0.42 0.42
surg swallowtail sympath tentat transfix
0.42 0.42 0.42 0.42 0.42
triangular welom wet ground sex
0.42 0.42 0.42 0.41 0.40
love thing tri introduc angel
0.40 0.40 0.39 0.39 0.39
shall right sort summari someth
0.38 0.37 0.37 0.37 0.36
realli jame apolog felt work
0.36 0.36 0.36 0.35 0.35
part guy want get first
0.35 0.34 0.34 0.34 0.34
suppos actual child least black
0.34 0.34 0.34 0.34 0.34
hand man accept blunt whip
0.34 0.33 0.33 0.33 0.33
way see everyth said even
0.32 0.32 0.32 0.32 0.32
mean met casey happili natur
0.32 0.32 0.32 0.32 0.32
safe synopsi dohner afterward argernon
0.32 0.32 0.32 0.32 0.32
belt berrr chastiti colli counterpart
0.32 0.32 0.32 0.32 0.32
cyborg deed dynam gamechang necessarili
0.32 0.32 0.32 0.32 0.32
parad pointyear ral replac rever
0.32 0.32 0.32 0.32 0.32
unbound warrior whatev zorn much
0.32 0.32 0.32 0.32 0.31
knew think sound wonder say
0.31 0.31 0.31 0.31 0.31
head speci plastic bulli center
0.31 0.31 0.31 0.31 0.31
cloth cream includ pink sword
0.31 0.31 0.31 0.31 0.31
ignor mate tie make seem
0.31 0.31 0.31 0.30 0.30
affect yea bear excus year
0.30 0.30 0.30 0.30 0.30
share jake strike contradict final
0.30 0.30 0.30 0.30 0.29
scene els someon without almost
0.29 0.29 0.29 0.29 0.29
bodi meant serious cloud day
0.29 0.29 0.28 0.28 0.28
drop week meet hous claim
0.28 0.28 0.28 0.28 0.28
alon attach exhaust glove retail
0.28 0.28 0.28 0.28 0.28
rude straw tone room adjust
0.28 0.28 0.28 0.28 0.28
conclus cost deduc disregard dose
0.28 0.28 0.28 0.28 0.28
fate furious gape glenn goodguy
0.28 0.28 0.28 0.28 0.28
greet hanger incid injuri ley
0.28 0.28 0.28 0.28 0.28
leyland longtim lowerswel luc luca
0.28 0.28 0.28 0.28 0.28
mateleyland monogom newest pertin pine
0.28 0.28 0.28 0.28 0.28
recogn sake sensit shaft sleepi
0.28 0.28 0.28 0.28 0.28
slide triomega vol wolv alpha
0.28 0.28 0.28 0.28 0.28
silent noth tell new broke
0.28 0.27 0.27 0.27 0.27
sinc old due abil listen
0.27 0.27 0.27 0.27 0.27
heart minut figur time also
0.27 0.26 0.26 0.26 0.26
back girl favorit simpl exampl
0.26 0.26 0.26 0.26 0.26
flight hell land heard volum
0.26 0.26 0.26 0.26 0.26
cultur strength author end find
0.26 0.26 0.25 0.25 0.25
happen never let bed reason
0.25 0.25 0.25 0.25 0.25
idea pain white allow found
0.25 0.25 0.25 0.25 0.24
seri good other got updat
0.24 0.24 0.24 0.24 0.24
break care world relationship exact
0.24 0.24 0.24 0.24 0.24
male awhil orgasm catch unbear
0.24 0.24 0.24 0.24 0.24
charact feel take better need
0.23 0.23 0.23 0.23 0.23
bring made frustrat around earth
0.23 0.23 0.23 0.23 0.23
grew men sexi refer began
0.23 0.23 0.23 0.23 0.23
color suggest book sever review
0.23 0.23 0.22 0.22 0.22
respect immedi know involv sad
0.22 0.22 0.22 0.22 0.22
saw wish bedroom live must
0.22 0.22 0.22 0.22 0.22
job secret hear ahead twice
0.22 0.22 0.22 0.22 0.22
plight decid town wrap blogspot
0.22 0.22 0.22 0.22 0.22
com delight dot elus happenedokay
0.22 0.22 0.22 0.22 0.22
haydeereview lovey marc outlandish proclaim
0.22 0.22 0.22 0.22 0.22
rival undi mysteri date enough
0.22 0.22 0.21 0.21 0.21
ever abl cut step talk
0.21 0.21 0.21 0.21 0.21
repeat wind regular beat greedi
0.21 0.21 0.21 0.21 0.21
immens insensit somebodi complet usual
0.21 0.21 0.21 0.20 0.20
wrong thought intrigu yeah either
0.20 0.20 0.20 0.20 0.20
laugh move pretti creat jealous
0.20 0.20 0.20 0.20 0.20
worst explain earli near add
0.20 0.20 0.20 0.20 0.20
gross lose proper signific slight
0.20 0.20 0.20 0.20 0.20
lover rape top chanc three
0.20 0.20 0.19 0.19 0.19
bit yes person month begin
0.19 0.19 0.19 0.19 0.19
dude fight huge issu frame
0.19 0.19 0.19 0.19 0.19
continu kidnap fantasi dream glare
0.19 0.19 0.19 0.19 0.19
save typic watch drunk weak
0.19 0.19 0.19 0.19 0.19
role imagin ask cliff easier
0.19 0.19 0.19 0.19 0.19
treat challeng anoth read blind
0.19 0.19 0.18 0.18 0.18
understand give second long two
0.18 0.18 0.18 0.18 0.18
truli atleast away bestfriend bipolar
0.18 0.18 0.18 0.18 0.18
cake cheater dec doubl dumbest
0.18 0.18 0.18 0.18 0.18
forgot gangsta ghetto last onlin
0.18 0.18 0.18 0.18 0.18
ppl smh ugrat ten doubt
0.18 0.18 0.18 0.18 0.18
appeal hang sequenc distract dramat
0.18 0.18 0.18 0.18 0.18
suffer curious togeth awkward pull
0.18 0.18 0.18 0.18 0.18
younger learn agre led win
0.18 0.18 0.18 0.18 0.18
wear half given everyon toward
0.18 0.17 0.17 0.17 0.17
action look now keep terribl
0.17 0.17 0.17 0.17 0.17
big quit chick later situat
0.17 0.17 0.17 0.17 0.17
wtf sentenc alway school adult
0.17 0.17 0.17 0.17 0.17
human negat assum grab ensign
0.17 0.17 0.17 0.17 0.17
initi neither expert still start
0.17 0.17 0.17 0.16 0.16
rush shifter next life enjoy
0.16 0.16 0.16 0.16 0.16
confus come pleas femal high
0.16 0.16 0.16 0.16 0.16
beyond blah blame deal dumb
0.16 0.16 0.16 0.16 0.16
heat lucki mall nerv soon
0.16 0.16 0.16 0.16 0.16
hot amus plus impress stand
0.16 0.16 0.16 0.16 0.16
hey cool agon anticip gloss
0.16 0.16 0.16 0.16 0.16
atticus instantan lissa borderlin anybodi
0.16 0.16 0.16 0.16 0.16
develop leav mine appar front
0.15 0.15 0.15 0.15 0.15
therefor
0.15
findAssocs(dtm2, terms = top10_words2[3], corlimit = 0.15)
$charact
main bigot aiden caveman
0.48 0.45 0.40 0.40
certian chestthump conniv crafti
0.40 0.40 0.40 0.40
creek daisi disservic fluff
0.40 0.40 0.40 0.40
hick hollow jessiema keepwomanbarefootpregn
0.40 0.40 0.40 0.40
korean lunk maniac moneygrub
0.40 0.40 0.40 0.40
mustach outrag railroad softer
0.40 0.40 0.40 0.40
stereoptyp storyanyway trio twodimension
0.40 0.40 0.40 0.40
unendear winc novel peter
0.40 0.40 0.39 0.38
suspect develop immedi incomplet
0.35 0.34 0.34 0.33
also cun fashion background
0.30 0.30 0.30 0.30
ignor appal almost make
0.30 0.30 0.29 0.28
final second actual busi
0.28 0.28 0.28 0.28
game reveal sound care
0.28 0.28 0.27 0.27
happili indian admir imag
0.27 0.27 0.27 0.27
predict suppos intellig author
0.26 0.26 0.26 0.24
side conclus amori indoor
0.24 0.24 0.24 0.24
kasi overheard programm prologu
0.24 0.24 0.24 0.24
scath smoke tomboy violat
0.24 0.24 0.24 0.24
virtu complex revolt like
0.24 0.24 0.24 0.23
bit intim nonexist time
0.23 0.23 0.23 0.22
first laugh moment choic
0.22 0.22 0.22 0.21
bed obvious clue connect
0.21 0.21 0.21 0.21
outsid note place stori
0.21 0.21 0.21 0.20
never reread even tell
0.20 0.20 0.20 0.20
men refer apolog grandmoth
0.20 0.20 0.20 0.20
plot experienc realli come
0.19 0.19 0.19 0.19
break father mean plan
0.19 0.19 0.19 0.19
secret shall dunnit exwif
0.19 0.19 0.19 0.19
scarc smart bridg duplic
0.19 0.19 0.19 0.19
map occas scrapbook stood
0.19 0.19 0.19 0.19
feel action ever leav
0.18 0.18 0.18 0.18
thing real becam kept
0.18 0.18 0.18 0.18
appeal explain safe still
0.18 0.18 0.18 0.17
complet get seem know
0.17 0.17 0.17 0.17
big drama abl shallow
0.17 0.17 0.17 0.17
human unrealist apart convers
0.17 0.17 0.17 0.17
half found unbeliev felt
0.16 0.16 0.16 0.16
set tri someth descript
0.16 0.16 0.16 0.16
say nativ part show
0.16 0.16 0.16 0.16
thrown trait role relev
0.16 0.16 0.16 0.16
hurt fascin photo shown
0.16 0.16 0.16 0.16
adopt aunt twain enough
0.16 0.16 0.16 0.15
problem lack two head
0.15 0.15 0.15 0.15
either introduc hope except
0.15 0.15 0.15 0.15
track
0.15
findAssocs(dtm2, terms = top10_words2[4], corlimit = 0.15)
$get
guy hous see want actual
0.45 0.45 0.39 0.38 0.36
met jake even thing adjust
0.36 0.36 0.35 0.35 0.35
contradict cost deduc disregard dose
0.35 0.35 0.35 0.35 0.35
fate furious gape glenn goodguy
0.35 0.35 0.35 0.35 0.35
greet hanger incid injuri ley
0.35 0.35 0.35 0.35 0.35
leyland longtim lowerswel luc luca
0.35 0.35 0.35 0.35 0.35
mate mateleyland monogom newest pertin
0.35 0.35 0.35 0.35 0.35
pine recogn sake sensit shaft
0.35 0.35 0.35 0.35 0.35
sleepi slide triomega vol wolv
0.35 0.35 0.35 0.35 0.35
like right broke due ground
0.34 0.34 0.34 0.34 0.34
town univers way knew tri
0.34 0.34 0.33 0.33 0.33
break share claim never know
0.33 0.33 0.33 0.32 0.32
sever glare drunk summari ignor
0.31 0.31 0.31 0.31 0.31
immens volum wrong first everyth
0.31 0.31 0.30 0.30 0.30
let decid anoth figur love
0.30 0.30 0.29 0.29 0.29
yes suppos man reason issu
0.29 0.29 0.29 0.29 0.29
alon good say someon month
0.29 0.28 0.28 0.28 0.28
step without plus save near
0.28 0.28 0.28 0.28 0.28
room time date final involv
0.28 0.27 0.27 0.27 0.27
year bring happen got immedi
0.27 0.26 0.26 0.26 0.26
yeah sinc die explain count
0.26 0.26 0.26 0.26 0.26
allow despit expert much two
0.26 0.26 0.26 0.25 0.25
back introduc mean speci amus
0.25 0.25 0.25 0.25 0.25
sequenc watch conclus countri author
0.25 0.25 0.25 0.25 0.24
minut take enough realli wonder
0.24 0.24 0.24 0.24 0.24
sort exact week dream regular
0.24 0.24 0.24 0.24 0.24
suffici greedi cliff easier heard
0.24 0.24 0.24 0.24 0.24
insensit unbear victim propos think
0.24 0.24 0.24 0.24 0.23
give keep ahead neither lover
0.23 0.23 0.23 0.23 0.23
touch parent still chanc better
0.23 0.23 0.22 0.22 0.22
find life now rather abl
0.22 0.22 0.22 0.22 0.22
yea bedroom except curious local
0.22 0.22 0.22 0.22 0.22
awhil catch evil insult opposit
0.22 0.22 0.22 0.22 0.22
selfish strength book detail bad
0.22 0.22 0.21 0.21 0.21
other use need desir said
0.21 0.21 0.21 0.21 0.21
sign doubt wait togeth beat
0.21 0.21 0.21 0.21 0.21
therefor suggest led wrap doctor
0.21 0.21 0.21 0.21 0.21
make end shifter someth friend
0.20 0.20 0.20 0.20 0.20
long els tell around atleast
0.20 0.20 0.20 0.20 0.20
begin bestfriend bipolar cake cheater
0.20 0.20 0.20 0.20 0.20
chick dec doubl dude dumb
0.20 0.20 0.20 0.20 0.20
dumbest forgot gangsta ghetto ppl
0.20 0.20 0.20 0.20 0.20
real smh ugrat across exampl
0.20 0.20 0.20 0.20 0.20
pain meet rang suffer hand
0.20 0.20 0.20 0.20 0.20
dress respons adversari appearenceslyra assail
0.20 0.20 0.20 0.20 0.20
auntadopt backstori beau brandnick brunetteblond
0.20 0.20 0.20 0.20 0.20
chula ciana darkhairedblond degre diari
0.20 0.20 0.20 0.20 0.20
discoveri driven encourag examin farm
0.20 0.20 0.20 0.20 0.20
fingerviol graduat guid hunger inherit
0.20 0.20 0.20 0.20 0.20
intact killrap lazi lube lustmeanwhil
0.20 0.20 0.20 0.20 0.20
lyra magician nightspot onboard overreli
0.20 0.20 0.20 0.20 0.20
parapsycholog presum readingan santera sevill
0.20 0.20 0.20 0.20 0.20
spirit stepmoth stethoscop stone subsequ
0.20 0.20 0.20 0.20 0.20
swore unwant vicer vkarandal weight
0.20 0.20 0.20 0.20 0.20
worselyra wretch afterward argernon belt
0.20 0.20 0.20 0.20 0.20
berrr chastiti colli counterpart cyborg
0.20 0.20 0.20 0.20 0.20
deed dynam gamechang necessarili parad
0.20 0.20 0.20 0.20 0.20
pointyear ral replac rever somebodi
0.20 0.20 0.20 0.20 0.20
unbound warrior whatev zorn cow
0.20 0.20 0.20 0.20 0.20
geez handcuf handcuff headboard headway
0.20 0.20 0.20 0.20 0.20
involuntari loosen swallow teas thesaurus
0.20 0.20 0.20 0.20 0.20
toe aimless arizona arm asinin
0.20 0.20 0.20 0.20 0.20
assist bozo campaign cite communiti
0.20 0.20 0.20 0.20 0.20
convey destroy dismiss extermin funnier
0.20 0.20 0.20 0.20 0.20
genocid gimlet heartless incarn invad
0.20 0.20 0.20 0.20 0.20
kurd kurdish laughabl leader lure
0.20 0.20 0.20 0.20 0.20
mafia manhunt nauseat obliqu occupi
0.20 0.20 0.20 0.20 0.20
paint pistol pointwel rah redneck
0.20 0.20 0.20 0.20 0.20
rescuer retali senior setup sewag
0.20 0.20 0.20 0.20 0.20
sicken signal staff stymi superbl
0.20 0.20 0.20 0.20 0.20
superhuman suppli suppress terror terrorist
0.20 0.20 0.20 0.20 0.20
trap tune turk turkish unfold
0.20 0.20 0.20 0.20 0.20
unti winnow yike mani start
0.20 0.20 0.20 0.19 0.19
probabl thought toward editor woman
0.19 0.19 0.19 0.19 0.19
yet bunch lie nerv talk
0.19 0.19 0.19 0.19 0.19
school abil casey ship activ
0.19 0.19 0.19 0.19 0.19
circumst treat handsom address patron
0.19 0.19 0.19 0.19 0.19
tie shoot tactic fbi attent
0.19 0.19 0.19 0.19 0.18
read seem second femal fight
0.18 0.18 0.18 0.18 0.18
flirt onlin part tabitha women
0.18 0.18 0.18 0.18 0.18
show publish multipl men result
0.18 0.18 0.18 0.18 0.18
kill bar natur turkey charact
0.18 0.18 0.18 0.18 0.17
page may sure backbon pleas
0.17 0.17 0.17 0.17 0.17
serious blame clear deal lucki
0.17 0.17 0.17 0.17 0.17
mind live clunki elsewher spoiler
0.17 0.17 0.17 0.17 0.17
sleep add suspect closet shot
0.17 0.17 0.17 0.17 0.17
cult hurt dohner alli locat
0.17 0.17 0.17 0.17 0.17
bet prime alpha silent rush
0.17 0.17 0.17 0.17 0.16
scene work come space relationship
0.16 0.16 0.16 0.16 0.16
away cheat drop male commit
0.16 0.16 0.16 0.16 0.16
kidnap wish alway current storytel
0.16 0.16 0.16 0.16 0.16
fail listen virgin stalk info
0.16 0.16 0.16 0.16 0.16
agre smell motiv win seri
0.16 0.16 0.16 0.16 0.15
complet mayb also care mine
0.15 0.15 0.15 0.15 0.15
dead split point either freak
0.15 0.15 0.15 0.15 0.15
screw fulli almost least game
0.15 0.15 0.15 0.15 0.15
assum train ask reveal intellig
0.15 0.15 0.15 0.15 0.15
assassin
0.15
findAssocs(dtm2, terms = top10_words2[5], corlimit = 0.15)
$good
editor introduc guy armament board
0.41 0.38 0.37 0.37 0.37
british confin editori enfield mkiii
0.37 0.37 0.37 0.37 0.37
murdoch potent repli smle trend
0.37 0.37 0.37 0.37 0.37
vicker wipe yard way split
0.37 0.37 0.37 0.36 0.36
met rifl titan thing broke
0.36 0.35 0.35 0.34 0.33
round actual atleast bestfriend bipolar
0.33 0.32 0.32 0.32 0.32
cake cheater dec doubl dude
0.32 0.32 0.32 0.32 0.32
dumbest forgot gangsta ghetto ppl
0.32 0.32 0.32 0.32 0.32
smh ugrat hous jake adjust
0.32 0.32 0.32 0.32 0.32
cost deduc disregard dose fate
0.32 0.32 0.32 0.32 0.32
furious gape glenn goodguy greet
0.32 0.32 0.32 0.32 0.32
hanger incid injuri ley leyland
0.32 0.32 0.32 0.32 0.32
longtim lowerswel luc luca mateleyland
0.32 0.32 0.32 0.32 0.32
monogom newest pertin pine recogn
0.32 0.32 0.32 0.32 0.32
sake sensit shaft sleepi slide
0.32 0.32 0.32 0.32 0.32
triomega vol wolv wtf share
0.32 0.32 0.32 0.31 0.31
mate chick onlin contradict epic
0.31 0.30 0.30 0.30 0.29
claim get blame man ground
0.29 0.28 0.28 0.28 0.28
room volum time break month
0.28 0.28 0.27 0.27 0.27
dumb mean nerv reason sinc
0.27 0.27 0.27 0.27 0.27
week want wrong first everyth
0.27 0.26 0.26 0.26 0.26
got right flirt tabitha due
0.26 0.26 0.26 0.26 0.26
summari minut knew life wonder
0.26 0.25 0.25 0.25 0.25
cousin exampl glare disast drunk
0.25 0.25 0.25 0.25 0.25
credul key lee machin scienc
0.25 0.25 0.25 0.25 0.25
heard anoth like exact fight
0.25 0.24 0.24 0.24 0.24
weapon meet close jealous mistak
0.24 0.23 0.23 0.23 0.23
fiction calib ship catch even
0.23 0.23 0.23 0.23 0.22
tell mine person away cut
0.22 0.22 0.22 0.22 0.22
male year near alon note
0.22 0.22 0.22 0.22 0.22
town date other probabl happen
0.22 0.21 0.21 0.21 0.21
scene let eat ice lucki
0.21 0.21 0.21 0.21 0.21
mall stalker step talk typo
0.21 0.21 0.21 0.21 0.21
without pretti live fail fine
0.21 0.21 0.21 0.21 0.21
regular greedi neither cliff countri
0.21 0.21 0.21 0.21 0.21
easier ignor immens insensit unbear
0.21 0.21 0.21 0.21 0.21
much author figur next enough
0.20 0.20 0.20 0.20 0.20
find toward two someon drop
0.20 0.20 0.20 0.20 0.20
freak lie mind real situat
0.20 0.20 0.20 0.20 0.20
bedroom men awhil creatur stalk
0.20 0.20 0.20 0.20 0.20
insult univers love see sever
0.20 0.20 0.19 0.19 0.19
shifter sure need now know
0.19 0.19 0.19 0.19 0.19
blah complain huge part except
0.19 0.19 0.19 0.19 0.19
listen discuss technolog ahead suggest
0.19 0.19 0.19 0.19 0.19
despit realli rather begin idea
0.19 0.18 0.18 0.18 0.18
lover treat patron take problem
0.18 0.18 0.18 0.17 0.17
cheat clear dig earth settl
0.17 0.17 0.17 0.17 0.17
singl straight vision yea upon
0.17 0.17 0.17 0.17 0.17
amus fulli plus altern sequenc
0.17 0.17 0.17 0.17 0.17
current save result curious depict
0.17 0.17 0.17 0.17 0.17
tension circumst conclus opposit insist
0.17 0.17 0.17 0.17 0.17
propos given well end think
0.17 0.16 0.16 0.16 0.16
desir work admit confus easili
0.16 0.16 0.16 0.16 0.16
involv either grammat mad screw
0.16 0.16 0.16 0.16 0.16
speci publish doubt die distract
0.16 0.16 0.16 0.16 0.16
otherwis explain stretch learn therefor
0.16 0.16 0.16 0.16 0.16
locat tri also final advanc
0.16 0.15 0.15 0.15 0.15
sme stupidityauthor techniqu shoot tactic
0.15 0.15 0.15 0.15 0.15
booksauthor maggi aimless arizona arm
0.15 0.15 0.15 0.15 0.15
asinin assist bozo campaign cite
0.15 0.15 0.15 0.15 0.15
communiti convey destroy dismiss extermin
0.15 0.15 0.15 0.15 0.15
funnier genocid gimlet heartless incarn
0.15 0.15 0.15 0.15 0.15
invad kurd kurdish laughabl leader
0.15 0.15 0.15 0.15 0.15
lure mafia manhunt nauseat obliqu
0.15 0.15 0.15 0.15 0.15
occupi paint pistol pointwel rah
0.15 0.15 0.15 0.15 0.15
redneck rescuer retali senior setup
0.15 0.15 0.15 0.15 0.15
sewag sicken signal staff stymi
0.15 0.15 0.15 0.15 0.15
superbl superhuman suppli suppress terror
0.15 0.15 0.15 0.15 0.15
terrorist trap tune turk turkish
0.15 0.15 0.15 0.15 0.15
unfold unti winnow yike
0.15 0.15 0.15 0.15
findAssocs(dtm2, terms = top10_words2[6], corlimit = 0.15)
$time
mate hous jake adjust cost
0.47 0.46 0.45 0.45 0.45
deduc disregard dose furious gape
0.45 0.45 0.45 0.45 0.45
glenn goodguy greet hanger incid
0.45 0.45 0.45 0.45 0.45
injuri ley leyland longtim lowerswel
0.45 0.45 0.45 0.45 0.45
luc luca mateleyland monogom newest
0.45 0.45 0.45 0.45 0.45
pertin pine recogn sake sensit
0.45 0.45 0.45 0.45 0.45
shaft sleepi slide triomega vol
0.45 0.45 0.45 0.45 0.45
wolv fate met glare year
0.45 0.44 0.43 0.43 0.42
claim contradict tri volum month
0.42 0.42 0.40 0.40 0.39
introduc share ground summari heard
0.39 0.39 0.39 0.39 0.39
figur actual thing without way
0.38 0.38 0.37 0.36 0.35
anoth sever reason week due
0.34 0.34 0.34 0.34 0.34
room univers everyth meet guy
0.34 0.34 0.33 0.33 0.32
right alon also wonder amus
0.32 0.32 0.31 0.31 0.31
drunk regular greedi catch cliff
0.31 0.31 0.31 0.31 0.31
easier ignor immens insensit unbear
0.31 0.31 0.31 0.31 0.31
see abl plus near wast
0.30 0.30 0.30 0.30 0.29
date happen even yeah sort
0.29 0.29 0.29 0.29 0.29
broke sinc least save jealous
0.29 0.29 0.29 0.29 0.29
assum suggest despit love life
0.29 0.29 0.29 0.28 0.28
rather involv move curious opposit
0.28 0.28 0.28 0.28 0.28
good get first got dream
0.27 0.27 0.27 0.27 0.27
listen explain like knew drop
0.27 0.27 0.26 0.26 0.26
man togeth town hey current
0.26 0.26 0.26 0.26 0.25
think two now exact publish
0.24 0.24 0.24 0.24 0.24
bedroom die sequenc awhil circumst
0.24 0.24 0.24 0.24 0.24
conclus insult lover desir never
0.24 0.24 0.24 0.23 0.23
realli mean fulli doubt except
0.23 0.23 0.23 0.23 0.23
hand refer add therefor tire
0.23 0.23 0.23 0.23 0.23
neither led wrap adopt aunt
0.23 0.23 0.23 0.23 0.23
much charact understand instead finish
0.22 0.22 0.22 0.22 0.22
toward absolut want read take
0.22 0.22 0.21 0.21 0.21
find poor noth someon part
0.21 0.21 0.21 0.21 0.21
everi men make author still
0.21 0.21 0.20 0.20 0.20
top seri bring give ago
0.20 0.20 0.20 0.20 0.20
mine away wait excus ahead
0.20 0.20 0.20 0.20 0.20
natur compar other friend thought
0.20 0.20 0.19 0.19 0.19
name woman cousin heat realiz
0.19 0.19 0.19 0.19 0.19
ten exampl manag protagonist touch
0.19 0.19 0.19 0.19 0.19
nick shown chanc horribl shifter
0.19 0.19 0.18 0.18 0.18
sure enough wrong work yes
0.18 0.18 0.18 0.18 0.18
els serious either women upon
0.18 0.18 0.18 0.18 0.18
watch result secret allow behavior
0.18 0.18 0.18 0.18 0.18
decid selfish book long suppos
0.18 0.18 0.17 0.17 0.17
tell know chick put across
0.17 0.17 0.17 0.17 0.17
pain alway live product fact
0.17 0.17 0.17 0.17 0.17
countri hard attent plot review
0.17 0.16 0.16 0.16 0.16
seem need say let world
0.16 0.16 0.16 0.16 0.16
space relationship advic atleast bestfriend
0.16 0.16 0.16 0.16 0.16
bipolar cake cheater day dec
0.16 0.16 0.16 0.16 0.16
doubl dumbest forgot gangsta ghetto
0.16 0.16 0.16 0.16 0.16
later male mind ppl smh
0.16 0.16 0.16 0.16 0.16
ugrat case adul afterlif aliv
0.16 0.16 0.16 0.16 0.16
andrew ankl artwork blond bracer
0.16 0.16 0.16 0.16 0.16
brutal bug caitlin chaf chapterfemilla
0.16 0.16 0.16 0.16 0.16
condemn consum crazymak demeana duti
0.16 0.16 0.16 0.16 0.16
frazetta genus goddess grievious regent
0.16 0.16 0.16 0.16 0.16
rehabilit salvat shirt slip tortur
0.16 0.16 0.16 0.16 0.16
traveltim withstand womankind yummi meant
0.16 0.16 0.16 0.16 0.16
bridg duplic map occas scrapbook
0.16 0.16 0.16 0.16 0.16
stood charlott departur vulner gradi
0.16 0.16 0.16 0.16 0.16
mika minni omar purrfect were
0.16 0.16 0.16 0.16 0.16
whine adversari appearenceslyra assail auntadopt
0.16 0.16 0.16 0.16 0.16
backstori beau brandnick brunetteblond chula
0.16 0.16 0.16 0.16 0.16
ciana darkhairedblond degre diari discoveri
0.16 0.16 0.16 0.16 0.16
driven encourag examin farm fingerviol
0.16 0.16 0.16 0.16 0.16
guid handsom hunger inherit intact
0.16 0.16 0.16 0.16 0.16
killrap lazi lube lustmeanwhil lyra
0.16 0.16 0.16 0.16 0.16
magician nightspot onboard overreli parapsycholog
0.16 0.16 0.16 0.16 0.16
presum readingan santera sevill spirit
0.16 0.16 0.16 0.16 0.16
stepmoth stethoscop stone subsequ swore
0.16 0.16 0.16 0.16 0.16
unwant vicer vkarandal weight worselyra
0.16 0.16 0.16 0.16 0.16
wretch ars rush mysteri said
0.16 0.16 0.15 0.15 0.15
back editor femal lead check
0.15 0.15 0.15 0.15 0.15
blah clear dude though success
0.15 0.15 0.15 0.15 0.15
death learn respons
0.15 0.15 0.15
findAssocs(dtm2, terms = top10_words2[7], corlimit = 0.15)
$realli
cow geez handcuf handcuff headboard headway
0.49 0.49 0.49 0.49 0.49 0.49
involuntari loosen swallow teas thesaurus toe
0.49 0.49 0.49 0.49 0.49 0.49
closet hurt backbon drug apolog like
0.43 0.41 0.39 0.39 0.37 0.36
yeah safe catch strength inconsist step
0.36 0.36 0.36 0.36 0.36 0.34
hous shot lock butt save stand
0.34 0.34 0.34 0.34 0.30 0.30
scatter guy break first share jake
0.30 0.29 0.29 0.28 0.28 0.28
count summari agre mate figur seri
0.28 0.28 0.28 0.28 0.27 0.27
see final editor keep thing awhil
0.27 0.27 0.27 0.27 0.27 0.27
room fate minut sever tri happen
0.27 0.27 0.26 0.26 0.26 0.26
pleas met glare protect adjust cost
0.26 0.26 0.26 0.26 0.26 0.26
deduc disregard dose furious gape glenn
0.26 0.26 0.26 0.26 0.26 0.26
goodguy greet hanger incid injuri ley
0.26 0.26 0.26 0.26 0.26 0.26
leyland longtim lowerswel luc luca mateleyland
0.26 0.26 0.26 0.26 0.26 0.26
monogom newest pertin pine recogn sake
0.26 0.26 0.26 0.26 0.26 0.26
sensit shaft sleepi slide triomega vol
0.26 0.26 0.26 0.26 0.26 0.26
wolv ride want everyth sign ahead
0.26 0.26 0.25 0.25 0.25 0.25
contradict volum cave doctor think get
0.25 0.25 0.25 0.25 0.24 0.24
need second toward bed reason claim
0.24 0.24 0.24 0.24 0.24 0.24
author time knew take wonder now
0.23 0.23 0.23 0.23 0.23 0.23
care suppos sinc alon ground stick
0.23 0.23 0.23 0.23 0.23 0.23
much chanc love seem said let
0.22 0.22 0.22 0.22 0.22 0.22
redeem paper meet explain neither heard
0.22 0.22 0.22 0.22 0.22 0.22
expert gloss start someth someon immedi
0.22 0.22 0.21 0.21 0.21 0.21
man alway chic cross grope harass
0.21 0.21 0.21 0.21 0.21 0.21
mia nervous unev yep ariann door
0.21 0.21 0.21 0.21 0.21 0.21
exboss latch liam macho profess quinn
0.21 0.21 0.21 0.21 0.21 0.21
shi way better leav introduc women
0.21 0.20 0.20 0.20 0.20 0.20
pretti excus hand near correct note
0.20 0.20 0.20 0.20 0.20 0.20
charact felt lust never thought actual
0.19 0.19 0.19 0.19 0.19 0.19
grew men year incorrect boss divorc
0.19 0.19 0.19 0.19 0.19 0.19
make good lot find give say
0.18 0.18 0.18 0.18 0.18 0.18
know right show dislik live redempt
0.18 0.18 0.18 0.18 0.18 0.18
except readi drunk regular decid pathet
0.18 0.18 0.18 0.18 0.18 0.18
suggest greedi rent sneak cliff easier
0.18 0.18 0.18 0.18 0.18 0.18
ignor immens insensit unbear anoth ever
0.18 0.18 0.18 0.18 0.17 0.17
els scream tell serious involv without
0.17 0.17 0.17 0.17 0.17 0.17
due assum curious therefor lover opposit
0.17 0.17 0.17 0.17 0.17 0.17
univers uniqu book also enjoy back
0.17 0.17 0.16 0.16 0.16 0.16
month quit jealous gun complet read
0.16 0.16 0.16 0.16 0.15 0.15
finish noth work word even part
0.15 0.15 0.15 0.15 0.15 0.15
boy secret array entireti scrambl blogspot
0.15 0.15 0.15 0.15 0.15 0.15
com delight dot elus happenedokay haydeereview
0.15 0.15 0.15 0.15 0.15 0.15
lovey marc outlandish proclaim rival undi
0.15 0.15 0.15 0.15 0.15 0.15
booksim dana edward mummayb nfh pyte
0.15 0.15 0.15 0.15 0.15 0.15
sceneim aloud cobra conqueror contemporari edgeseat
0.15 0.15 0.15 0.15 0.15 0.15
icarus intric jaw outbound quadrail scriptoveral
0.15 0.15 0.15 0.15 0.15 0.15
snap stormtroop thrawn timothi trilog troop
0.15 0.15 0.15 0.15 0.15 0.15
videogam weakest whitearmor zahn guidanc revis
0.15 0.15 0.15 0.15 0.15 0.15
vast fizzel ton actor cadfael choos
0.15 0.15 0.15 0.15 0.15 0.15
commentsamazon elli miscast sheriff televis
0.15 0.15 0.15 0.15 0.15
findAssocs(dtm2, terms = top10_words2[8], corlimit = 0.15)
$author
tri actual think final
0.30 0.30 0.29 0.29
toward editor ignor ground
0.29 0.29 0.29 0.28
note first introduc hous
0.28 0.27 0.27 0.27
share stand make sever
0.27 0.27 0.26 0.26
never met due rifl
0.26 0.26 0.26 0.26
ship like take use
0.26 0.25 0.25 0.25
break jake heard unbear
0.25 0.25 0.25 0.25
wrap way charact get
0.25 0.24 0.24 0.24
probabl instead ever reason
0.24 0.24 0.24 0.24
publish research armament board
0.24 0.24 0.24 0.24
british confin editori enfield
0.24 0.24 0.24 0.24
mkiii murdoch potent repli
0.24 0.24 0.24 0.24
smle trend vicker wipe
0.24 0.24 0.24 0.24
yard adjust contradict cost
0.24 0.24 0.24 0.24
deduc disregard dose furious
0.24 0.24 0.24 0.24
gape glenn goodguy greet
0.24 0.24 0.24 0.24
hanger incid injuri ley
0.24 0.24 0.24 0.24
leyland longtim lowerswel luc
0.24 0.24 0.24 0.24
luca mateleyland monogom newest
0.24 0.24 0.24 0.24
pertin pine recogn sake
0.24 0.24 0.24 0.24
sensit shaft sleepi slide
0.24 0.24 0.24 0.24
triomega vol volum wolv
0.24 0.24 0.24 0.24
found stori guy also
0.23 0.23 0.23 0.23
realli die live sequenc
0.23 0.23 0.23 0.23
clunki claim curious suggest
0.23 0.23 0.23 0.23
conclus fate anoth two
0.23 0.23 0.22 0.22
even yeah broke without
0.22 0.22 0.22 0.22
count gun town despit
0.22 0.22 0.22 0.22
mate expert book enough
0.22 0.22 0.21 0.21
better give second wonder
0.21 0.21 0.21 0.21
back right thing near
0.21 0.21 0.21 0.21
summari lover figur time
0.21 0.21 0.20 0.20
good thought someon tell
0.20 0.20 0.20 0.20
immedi serious involv show
0.20 0.20 0.20 0.20
least except narrat trait
0.20 0.20 0.20 0.20
novel regular credul lee
0.20 0.20 0.20 0.20
titan main write need
0.20 0.19 0.19 0.19
desir liter work say
0.19 0.19 0.19 0.19
let suppos detect cut
0.19 0.19 0.19 0.19
issu fulli almost copi
0.19 0.19 0.19 0.19
gone watch fiction major
0.19 0.19 0.19 0.19
technolog happili ahead awhil
0.19 0.19 0.19 0.19
alon safe appear calib
0.19 0.19 0.19 0.19
led opposit univers peter
0.19 0.19 0.19 0.19
still attent problem said
0.18 0.18 0.18 0.18
descript know man anyth
0.18 0.18 0.18 0.18
idea closet love read
0.18 0.18 0.17 0.17
consid poor got long
0.17 0.17 0.17 0.17
sound absolut word world
0.17 0.17 0.17 0.17
head sinc plan similar
0.17 0.17 0.17 0.17
excus reader meet young
0.17 0.17 0.17 0.17
distract men save throughout
0.17 0.17 0.17 0.17
advis astonish cartooni composit
0.17 0.17 0.17 0.17
crunch duplex entranc golf
0.17 0.17 0.17 0.17
homeless incorrect nine outer
0.17 0.17 0.17 0.17
reedit retain selfawar semicolon
0.17 0.17 0.17 0.17
strunk thunder uneasi vampirelik
0.17 0.17 0.17 0.17
wrist wrongreal zombievis secret
0.17 0.17 0.17 0.17
hand behavior broad conceptu
0.17 0.17 0.17 0.17
oop savannah room harlequin
0.17 0.17 0.17 0.17
inaccur mishmash aiden caveman
0.17 0.17 0.17 0.17
certian chestthump conniv crafti
0.17 0.17 0.17 0.17
creek daisi disservic fluff
0.17 0.17 0.17 0.17
hick hollow jessiema keepwomanbarefootpregn
0.17 0.17 0.17 0.17
korean lunk maniac moneygrub
0.17 0.17 0.17 0.17
mustach outrag railroad softer
0.17 0.17 0.17 0.17
stereoptyp storyanyway trio twodimension
0.17 0.17 0.17 0.17
unendear winc cow geez
0.17 0.17 0.17 0.17
handcuf handcuff headboard headway
0.17 0.17 0.17 0.17
involuntari loosen swallow teas
0.17 0.17 0.17 0.17
thesaurus toe authent natchitoch
0.17 0.17 0.17 0.17
wikipedia gear indi konrath
0.17 0.17 0.17 0.17
myspac spectacular much want
0.17 0.17 0.16 0.16
complet potenti bit find
0.16 0.16 0.16 0.16
account affect happen roll
0.16 0.16 0.16 0.16
backbon around mean across
0.16 0.16 0.16 0.16
portion glare cancer elsewher
0.16 0.16 0.16 0.16
rang structur epic dramat
0.16 0.16 0.16 0.16
mistak drunk discuss result
0.16 0.16 0.16 0.16
explain assum fanci advantag
0.16 0.16 0.16 0.16
apolog key machin round
0.16 0.16 0.16 0.16
scienc comedi greedi catch
0.16 0.16 0.16 0.16
drug shot circumst cliff
0.16 0.16 0.16 0.16
easier immens insensit disrespect
0.16 0.16 0.16 0.16
girlfriend exposit hurt agon
0.16 0.16 0.16 0.16
hair parent bigot incomplet
0.16 0.16 0.16 0.16
strength eyebal describ bring
0.16 0.16 0.15 0.15
sort pass year differ
0.15 0.15 0.15 0.15
execut
0.15
findAssocs(dtm2, terms = top10_words2[9], corlimit = 0.15)
$short
stori enoughsorri playshirley unneed assur flipflop react
0.36 0.19 0.19 0.19 0.19 0.19 0.19
geneviev suffic welldevelop wellsilli closur invent unfinish
0.19 0.19 0.19 0.19 0.19 0.19 0.19
result higher
0.18 0.17
findAssocs(dtm2, terms = top10_words2[10], corlimit = 0.15)
$read
bookstor electron extinct gizmo inevit ironi nile
0.34 0.34 0.34 0.34 0.34 0.34 0.34
purveyor reawaken sermon devic discov sever tire
0.34 0.34 0.34 0.27 0.25 0.24 0.23
unbear final ground time especi hous correct
0.23 0.22 0.22 0.21 0.21 0.21 0.21
figur seri current year jake summari adjust
0.20 0.20 0.20 0.20 0.20 0.20 0.20
cost deduc disregard dose furious gape glenn
0.20 0.20 0.20 0.20 0.20 0.20 0.20
goodguy greet hanger incid injuri ley leyland
0.20 0.20 0.20 0.20 0.20 0.20 0.20
longtim lowerswel luc luca mate mateleyland monogom
0.20 0.20 0.20 0.20 0.20 0.20 0.20
newest pertin pine recogn sake sensit shaft
0.20 0.20 0.20 0.20 0.20 0.20 0.20
sleepi slide triomega vol wolv cow geez
0.20 0.20 0.20 0.20 0.20 0.20 0.20
handcuf handcuff headboard headway involuntari loosen swallow
0.20 0.20 0.20 0.20 0.20 0.20 0.20
teas thesaurus toe tri desir finish yeah
0.20 0.20 0.20 0.19 0.19 0.19 0.19
apolog fate volum like get someon suppos
0.19 0.19 0.19 0.18 0.18 0.18 0.18
met amus claim near room drug shot
0.18 0.18 0.18 0.18 0.18 0.18 0.18
contradict heard ignor hurt fascin clueless hop
0.18 0.18 0.18 0.18 0.18 0.18 0.18
author book ago hour rather stop even
0.17 0.17 0.17 0.17 0.17 0.17 0.17
thing meet closet yes keep introduc ten
0.17 0.17 0.17 0.16 0.16 0.16 0.16
share suggest chanc first bring give realli
0.16 0.16 0.15 0.15 0.15 0.15 0.15
backbon right excus
0.15 0.15 0.15
#Modelo de lenguaje
s2 <- udpipe_annotate(udmodel_english, kindle_filtrado2$reviewText)
x2 <- data.frame(s2)
#Sustantivos
stats2 <- subset(x2, upos %in% c("NOUN"))
stats2 <- txt_freq(stats2$token)
stats2$key <- factor(stats2$key, levels = rev(stats2$key))
head(stats2, 20)
barchart(key ~ freq, data = head(stats2, 20),
col = "lightpink",
main = "Most occurring Nouns (rating <= 2)",
xlab = "Freq")
#Adjetivos
stats2 <- subset(x2, upos %in% c("ADJ"))
stats2 <- txt_freq(stats2$token)
stats2$key <- factor(stats2$key, levels = rev(stats2$key))
head(stats2, 20)
barchart(key ~ freq, data = head(stats2, 20),
col = "orchid",
main = "Most common Adjectives (rating <= 2)",
xlab = "Freq")
#Verbos
stats2 <- subset(x2, upos %in% c("VERB"))
stats2 <- txt_freq(stats2$token)
stats2$key <- factor(stats2$key, levels = rev(stats2$key))
head(stats2, 20)
barchart(key ~ freq, data = head(stats2, 20),
col = "hotpink",
main = "Most occurring Verbs (rating <= 2)",
xlab = "Freq")
#RAKE
stats2 <- keywords_rake(
x = x2,
term = "lemma",
group = "doc_id",
relevant = x2$upos %in% c("NOUN", "ADJ")
)
stats2$key <- factor(stats2$keyword, levels = rev(stats2$keyword))
barchart(key ~ rake, data = head(subset(stats2, freq > 3), 20),
col = "plum",
main = "Keywords identified by RAKE (rating <= 2)",
xlab = "Rake")
#Frases comunes
x2$phrase_tag <- as_phrasemachine(x2$upos, type = "upos")
stats2_phrases <- keywords_phrases(
x = x2$phrase_tag,
term = tolower(x2$token),
pattern = "(A|N)*N(P+D*(A|N)*N)*",
is_regex = TRUE,
detailed = FALSE
)
stats2_phrases <- subset(stats2_phrases, ngram > 1 & freq > 3)
stats2_phrases$key <- factor(stats2_phrases$keyword, levels = rev(stats2_phrases$keyword))
barchart(key ~ freq, data = head(stats2_phrases, 20),
col = "mediumvioletred",
main = "Keywords - simple noun phrases (rating <= 2)",
xlab = "Frequency")
#Nubes de palabras
set.seed(123)
wordcloud(words = d2$word,
freq = d2$freq,
min.freq = 10,
max.words = 80,
random.order = FALSE,
rot.per = 0.2,
scale = c(4, 0.8),
colors = brewer.pal(8, "Dark2"))
wordcloud(words = d2$word,
freq = d2$freq,
min.freq = 8,
max.words = 100,
random.order = FALSE,
rot.per = 0.35,
colors = brewer.pal(8, "Set2"))
#Comparativa entre análisis 1 y análisis 2 #Tabla de palabras más frecuentes
top_analisis1 <- head(d, 10) %>%
mutate(analisis = "Rating >= 4")
top_analisis2 <- head(d2, 10) %>%
mutate(analisis = "Rating <= 2")
comparativa_top <- bind_rows(top_analisis1, top_analisis2)
comparativa_top
#Grafica comparativa
comparativa_top %>%
ggplot(aes(x = reorder(word, freq), y = freq, fill = analisis)) +
geom_col(position = "dodge") +
coord_flip() +
labs(
title = "Comparación de palabras más frecuentes",
x = "Palabra",
y = "Frecuencia"
)