library(tm); library(magrittr); library(tidytext)
library(dplyr); library(ggplot2)

The dataset is taken from 515K Hotel Reviews Data in Europe | Kaggle

reviews <- read.csv("Hotel_Reviews.csv")
neg_dtm <- VectorSource(reviews$Negative_Review) %>%
        VCorpus() %>%
        DocumentTermMatrix(control = list(removePunctuation = TRUE,
                            removeNumbers = TRUE,
                            stopwords = TRUE))
inspect(neg_dtm)
## <<DocumentTermMatrix (documents: 515738, terms: 51547)>>
## Non-/sparse entries: 4228059/26580518627
## Sparsity           : 100%
## Maximal term length: 148
## Weighting          : term frequency (tf)
## Sample             :
##         Terms
## Docs     bed breakfast hotel negative nothing one room rooms small staff
##   11363    1         2     0        0       0   0    0     3     1     1
##   138219   0        10     2        0       1   1    0     0     0     3
##   204173   1         2     3        0       1   1    6     0     0     0
##   218929   0         0     4        0       0   3    3     0     3     0
##   254493   2         1     5        0       0   1    8     0     2     2
##   281278   2         0     1        0       0   7    9     3     0     6
##   305003   2         1     3        1       1   3    7     0     0     0
##   394960   0         0     0        0       0   0    6     3     0     0
##   395861   0         0     3        0       0   3   11     0     0     1
##   474301   0         5     5        0       0   4    6     0     1     1

Although we can see the negative reviews given by each guest, we want to know the subjects that give rise to the negative reviews; the top 20 words associated with negative reviews are

neg_tidy %>% group_by(term) %>%
summarise(sum = sum(count)) %>%
top_n(20) %>% arrange(desc(sum))
## Selecting by sum
## # A tibble: 20 x 2
##    term         sum
##    <chr>      <dbl>
##  1 room      176052
##  2 negative  129447
##  3 hotel      74709
##  4 breakfast  58478
##  5 small      49883
##  6 staff      39512
##  7 nothing    38769
##  8 rooms      34808
##  9 bed        29834
## 10 one        28096
## 11 bit        27546
## 12 bathroom   26585
## 13 didn       26463
## 14 night      24083
## 15 little     22536
## 16 like       22445
## 17 shower     21290
## 18 good       20821
## 19 get        19504
## 20 service    19323

We see the negative words but we want to know what are associated with room, breakfast, staff and service

neg_list <- c("room", "breakfast", "staff", "service")
findAssocs(neg_dtm, neg_list, 0.1)
## $room
##        small       booked        hotel          one          bed 
##         0.27         0.24         0.23         0.23         0.21 
##       double        night         also      another        asked 
##         0.21         0.21         0.20         0.20         0.20 
##        first        given         told         even          got 
##         0.20         0.20         0.20         0.19         0.19 
##         next        floor          day          get    reception 
##         0.19         0.18         0.17         0.17         0.17 
##       window      arrived         back         didn         door 
##         0.17         0.16         0.16         0.16         0.16 
##         just         paid         said         size         time 
##         0.16         0.16         0.16         0.16         0.16 
##          two          air     bathroom      booking         like 
##         0.16         0.15         0.15         0.15         0.15 
##        moved          put         stay         view         came 
##         0.15         0.15         0.15         0.15         0.14 
##       change         gave      offered        ready       single 
##         0.14         0.14         0.14         0.14         0.14 
##     standard       stayed     superior         tiny         twin 
##         0.14         0.14         0.14         0.14         0.14 
##        check        clean         left         made         move 
##         0.13         0.13         0.13         0.13         0.13 
##         open       people       second       shower        sleep 
##         0.13         0.13         0.13         0.13         0.13 
##         wasn         went         work       around          ask 
##         0.13         0.13         0.13         0.12         0.12 
##    available       called          can      checked      cleaned 
##         0.12         0.12         0.12         0.12         0.12 
##       deluxe      however      morning        never    requested 
##         0.12         0.12         0.12         0.12         0.12 
##        rooms      someone        still      upgrade     upgraded 
##         0.12         0.12         0.12         0.12         0.12 
##         wall         will      arrival          com   complained 
##         0.12         0.12         0.11         0.11         0.11 
##       couldn         desk    different        dirty    executive 
##         0.11         0.11         0.11         0.11         0.11 
##        extra      finally        found          hot         make 
##         0.11         0.11         0.11         0.11         0.11 
##      manager         much       nights        noise        noisy 
##         0.11         0.11         0.11         0.11         0.11 
##       really          see      smaller        space        water 
##         0.11         0.11         0.11         0.11         0.11 
##         well      without     although     basement         call 
##         0.11         0.11         0.10         0.10         0.10 
##      changed       coffee         cold         come         felt 
##         0.10         0.10         0.10         0.10         0.10 
##         give         hear          key        light       looked 
##         0.10         0.10         0.10         0.10         0.10 
##          pay receptionist         star          tea       though 
##         0.10         0.10         0.10         0.10         0.10 
##       toilet      windows 
##         0.10         0.10 
## 
## $breakfast
##    included      buffet continental      cooked        eggs   expensive 
##        0.26        0.21        0.19        0.18        0.17        0.17 
##        food       price       bacon      coffee        poor       toast 
##        0.14        0.14        0.13        0.13        0.13        0.13 
##      choice         eat       table       bread     english       fruit 
##        0.12        0.12        0.12        0.11        0.11        0.11 
##        good       hotel       juice   selection      served      tables 
##        0.11        0.11        0.11        0.11        0.11        0.11 
##     variety   scrambled 
##        0.11        0.10 
## 
## $staff
##       rude     member   friendly    helpful  reception      hotel 
##       0.26       0.25       0.24       0.23       0.23       0.19 
##      asked      check        one       told  unhelpful       desk 
##       0.16       0.16       0.15       0.15       0.15       0.14 
##    members   attitude       even       help       said   customer 
##       0.14       0.13       0.13       0.13       0.13       0.12 
##      front       stay       time unfriendly       also    another 
##       0.12       0.12       0.12       0.12       0.11       0.11 
##        bar        day       didn     guests       just    manager 
##       0.11       0.11       0.11       0.11       0.11       0.11 
##      never        ask       back       came    english        get 
##       0.11       0.10       0.10       0.10       0.10       0.10 
##     polite      speak   training  welcoming 
##       0.10       0.10       0.10       0.10 
## 
## $service
##   customer       food      order     charge    ordered       poor 
##       0.20       0.15       0.14       0.13       0.13       0.13 
##       slow        bar      hotel       menu restaurant 
##       0.13       0.12       0.12       0.11       0.11
  1. Room
  1. Breakfast
  1. Staff
  1. Service