library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library(tidytext)
library(janeaustenr)
library(ggplot2)
austen_books() %>%
  unnest_tokens(token = "words", word, text) %>%
  count(book, word) %>%
  bind_tf_idf(word, book, n) %>%
  arrange(desc(tf_idf)) %>%
  group_by(book) %>%
  top_n(10) %>%
  ggplot(aes(x=factor(word, levels = rev(unique(word))), y=tf_idf, fill=book)) + geom_bar(stat="identity", show.legend = FALSE) + facet_wrap(~ book, ncol=3, scales = "free") + labs(x="", y="tf-idf") + theme(panel.background = element_blank()) + coord_flip()
## Selecting by tf_idf


Above plot shows that top 10 most important words used in Jane Austen’s books are names of characters and places