- Here is some of the code used to create the word frequency plot on the previous slide.
# create doc term matrix (DTM)
vectorizer <- vocab_vectorizer(v)
dtm <- create_dtm(it_dtm, vectorizer)
# use as.matrix to force dtm into acceptable format for colSum
dtm_dense <- as.matrix(dtm)
word_counts <- colSums(dtm_dense)
# make plot
word_counts_df <- data.frame(
word = names(word_counts),
count = as.vector(word_counts)
) %>% dplyr::arrange(desc(count)) %>% dplyr::slice_head(n = 15)
ggplot(word_counts_df, aes(x = reorder(word, count), y = count)) +
geom_bar(stat = "identity", fill = "palegreen") +
coord_flip() +
labs(title = "Top 15 Most Frequent Words",
x = "Word",
y = "Frequency")