Load the packages & Hotel_Reviews dataset

library(dplyr); library(tidytext)
## Warning: package 'dplyr' was built under R version 3.6.2
library(stringr); library(tidyr)
## Warning: package 'tidyr' was built under R version 3.6.2
reviews <- read.csv('Hotel_Reviews.csv', header=T, stringsAsFactors = F)

Extract the negative reviews and make it into a tibble

reviews <- reviews %>% filter(Negative_Review!='No Negative')
neg_text <- reviews$Negative_Review
neg_df <- tibble(line = 1:387848, text = neg_text)

Tidy the tibble into a word df, and remove stop words

data(stop_words)
tidy_neg <- neg_df %>%
  unnest_tokens(word, text) %>% 
  anti_join(stop_words)
## Joining, by = "word"

Sort tidy_neg

tidy_neg %>% count(word, sort = T)
## # A tibble: 54,981 x 2
##    word          n
##    <chr>     <int>
##  1 hotel     74709
##  2 breakfast 58478
##  3 staff     39512
##  4 bed       29828
##  5 bit       27546
##  6 bathroom  26585
##  7 didn      26463
##  8 night     24063
##  9 shower    21290
## 10 service   19323
## # ... with 54,971 more rows

Tidy the tibble into a bigrams df

neg_bigrams <- neg_df %>%
  unnest_tokens(bigram, text, token = "ngrams", n = 2)
neg_bigrams %>%
  count(bigram, sort = T)
## # A tibble: 983,358 x 2
##    bigram        n
##    <chr>     <int>
##  1 in the    68006
##  2 the room  58743
##  3 the hotel 35155
##  4 of the    33735
##  5 room was  31439
##  6 it was    30989
##  7 <NA>      30212
##  8 didn t    26449
##  9 on the    24877
## 10 was a     24504
## # ... with 983,348 more rows

Tidy the df into a word df, and remove stop words

bigrams_separated <- neg_bigrams %>%
  filter(bigram != 'NA') %>%
  separate(bigram, c('word1',  'word2'), sep = " ")
bigrams_filtered <- bigrams_separated %>%
  filter(!word1 %in% stop_words$word) %>%
  filter(!word2 %in% stop_words$word)

Create a word of interest function for word2

word_interest <- function(word) {
  bigrams_filtered %>%
  filter(word2 == word) %>%
  count(word1, word2, sort = T) %>%
  top_n(15)
}

See the words that are associated with hotel

word_interest('hotel')
## Selecting by n
## # A tibble: 15 x 3
##    word1     word2     n
##    <chr>     <chr> <int>
##  1 star      hotel  3962
##  2 stars     hotel   805
##  3 4         hotel   516
##  4 nice      hotel   328
##  5 5         hotel   263
##  6 worst     hotel   237
##  7 expensive hotel   215
##  8 boutique  hotel   196
##  9 lovely    hotel   177
## 10 london    hotel   176
## 11 sister    hotel   175
## 12 hilton    hotel   150
## 13 business  hotel   144
## 14 4star     hotel   142
## 15 luxury    hotel   123

See the words that are associated with breakfast

word_interest('breakfast')
## Selecting by n
## # A tibble: 15 x 3
##    word1         word2         n
##    <chr>         <chr>     <int>
##  1 cooked        breakfast   828
##  2 poor          breakfast   789
##  3 continental   breakfast   785
##  4 expensive     breakfast   785
##  5 english       breakfast   739
##  6 buffet        breakfast   454
##  7 free          breakfast   420
##  8 hot           breakfast   325
##  9 include       breakfast   283
## 10 hotel         breakfast   246
## 11 included      breakfast   220
## 12 service       breakfast   169
## 13 eat           breakfast   157
## 14 complimentary breakfast   150
## 15 limited       breakfast   125

See the words that are associated with staff

word_interest('staff')
## Selecting by n
## # A tibble: 15 x 3
##    word1        word2     n
##    <chr>        <chr> <int>
##  1 reception    staff  2072
##  2 hotel        staff  1012
##  3 bar          staff   873
##  4 cleaning     staff   676
##  5 desk         staff   645
##  6 breakfast    staff   505
##  7 restaurant   staff   380
##  8 rude         staff   340
##  9 friendly     staff   241
## 10 night        staff   227
## 11 unfriendly   staff   227
## 12 housekeeping staff   194
## 13 service      staff   169
## 14 helpful      staff   157
## 15 waiting      staff   134

See the words that are associated with bed

word_interest('bed')
## Selecting by n
## # A tibble: 15 x 3
##    word1         word2     n
##    <chr>         <chr> <int>
##  1 double        bed    2802
##  2 sofa          bed     845
##  3 single        bed     520
##  4 size          bed     517
##  5 extra         bed     472
##  6 uncomfortable bed     390
##  7 twin          bed     240
##  8 hard          bed     192
##  9 king          bed     180
## 10 queen         bed     172
## 11 comfortable   bed     133
## 12 sized         bed      91
## 13 bigger        bed      73
## 14 camp          bed      70
## 15 tiny          bed      70

See the words that are associated with bathroom

word_interest('bathroom')
## Selecting by n
## # A tibble: 15 x 3
##    word1    word2        n
##    <chr>    <chr>    <int>
##  1 tiny     bathroom   270
##  2 dirty    bathroom   111
##  3 glass    bathroom    83
##  4 poor     bathroom    53
##  5 cold     bathroom    50
##  6 entire   bathroom    48
##  7 bed      bathroom    45
##  8 clean    bathroom    43
##  9 tired    bathroom    43
## 10 dated    bathroom    41
## 11 hotel    bathroom    39
## 12 noisy    bathroom    38
## 13 separate bathroom    38
## 14 cramped  bathroom    37
## 15 smelly   bathroom    37

See the words that are associated with night

word_interest('night')
## Selecting by n
## # A tibble: 15 x 3
##    word1         word2     n
##    <chr>         <chr> <int>
##  1 1             night   497
##  2 late          night   348
##  3 saturday      night   309
##  4 friday        night   151
##  5 2nd           night   137
##  6 3             night   131
##  7 2             night   107
##  8 1st           night   106
##  9 extra         night   102
## 10 sunday        night   101
## 11 4             night    78
## 12 uncomfortable night    60
## 13 sleepless     night    59
## 14 mid           night    44
## 15 hot           night    39

See the words that are associated with shower

word_interest('shower')
## Selecting by n
## # A tibble: 15 x 3
##    word1    word2      n
##    <chr>    <chr>  <int>
##  1 bathroom shower   254
##  2 bath     shower   226
##  3 cold     shower   125
##  4 poor     shower   111
##  5 broken   shower    96
##  6 tiny     shower    82
##  7 toilet   shower    78
##  8 separate shower    76
##  9 proper   shower    63
## 10 rain     shower    63
## 11 glass    shower    60
## 12 taking   shower    59
## 13 hot      shower    56
## 14 leaking  shower    47
## 15 held     shower    43

See the words that are associated with service

word_interest('service')
## Selecting by n
## # A tibble: 15 x 3
##    word1      word2       n
##    <chr>      <chr>   <int>
##  1 customer   service  1098
##  2 poor       service   575
##  3 breakfast  service   423
##  4 slow       service   293
##  5 bar        service   280
##  6 bad        service   188
##  7 shuttle    service   178
##  8 cleaning   service   177
##  9 restaurant service   115
## 10 laundry    service   108
## 11 5          service   105
## 12 food       service    96
## 13 staff      service    96
## 14 concierge  service    90
## 15 internet   service    83

See the words that are associated with lounge

word_interest('lounge')
## Selecting by n
## # A tibble: 16 x 3
##    word1       word2      n
##    <chr>       <chr>  <int>
##  1 executive   lounge   360
##  2 club        lounge   207
##  3 sky         lounge   125
##  4 bar         lounge    84
##  5 breakfast   lounge    34
##  6 exec        lounge    31
##  7 hotel       lounge    23
##  8 business    lounge    21
##  9 lobby       lounge    17
## 10 reception   lounge    16
## 11 floor       lounge    15
## 12 cocktail    lounge    12
## 13 public      lounge    11
## 14 comfortable lounge    10
## 15 guest       lounge    10
## 16 residents   lounge    10

See the words that are associated with spa

word_interest('spa')
## Selecting by n
## # A tibble: 19 x 3
##    word1      word2     n
##    <chr>      <chr> <int>
##  1 pool       spa      36
##  2 gym        spa      33
##  3 breakfast  spa      12
##  4 free       spa      12
##  5 hotel      spa      10
##  6 poor       spa       9
##  7 real       spa       8
##  8 relaxing   spa       7
##  9 facilities spa       6
## 10 limited    spa       6
## 11 sauna      spa       6
## 12 booked     spa       5
## 13 expensive  spa       5
## 14 advertised spa       4
## 15 bar        spa       4
## 16 foot       spa       4
## 17 health     spa       4
## 18 hotels     spa       4
## 19 noisy      spa       4

See the words that are associated with wifi

word_interest('wifi')
## Selecting by n
## # A tibble: 15 x 3
##    word1         word2     n
##    <chr>         <chr> <int>
##  1 free          wifi   1608
##  2 poor          wifi    424
##  3 slow          wifi    203
##  4 bad           wifi    171
##  5 paid          wifi    110
##  6 weak          wifi     82
##  7 hotel         wifi     58
##  8 internet      wifi     57
##  9 expensive     wifi     56
## 10 terrible      wifi     50
## 11 complimentary wifi     32
## 12 premium       wifi     28
## 13 speed         wifi     28
## 14 unstable      wifi     27
## 15 day           wifi     24