This is a message from your friends.
Enjoy your Day !
## There are a total of 11 entries
knitr::opts_chunk$set(echo = F,warning = F,message = F)
pacman::p_load(tidyverse,wordcloud,stringr,tidytext,tm,readxl,prettydoc)
## This data is stored in a google sheet linked to a form
df <- read_excel("Sandra.xlsx")
text <- df$`Type a message to Sandra`
## pre-processing text:
clean.text = function(x)
{
# convert to lower case
x = tolower(x)
# remove rt
x = gsub("rt", "", x)
# remove at
x = gsub("@\\w+", "", x)
# remove punctuation
x = gsub("[[:punct:]]", "", x)
# remove numbers
x = gsub("[[:digit:]]", "", x)
x = gsub("sandra", "", x)
# remove links http
x = gsub("know", "", x)
# remove tabs
x = gsub("[ |\t]{2,}", "", x)
# remove blank spaces at the beginning
x = gsub("^ ", "", x)
# remove blank spaces at the end
x = gsub(" $", "", x)
# some other cleaning text
x = gsub('you','',x)
x = gsub('are','',x)
x = gsub('is', ' ',x)
x = gsub('[[:punct:]]', '', x)
x = gsub('[[:cntrl:]]', '', x)
x = gsub('\\d+', '', x)
x = str_replace_all(x,"[^[:graph:]]", " ")
return(x)
}
#Total number of entries
cat("There are a total of " ,nrow(df), "entries")
cleanText <- clean.text(text)
cleanText <- cleanText[cleanText != " "]
text_corpus <- Corpus(VectorSource(cleanText))
text_corpus <- tm_map(text_corpus, content_transformer(tolower))
text_corpus <- tm_map(text_corpus, function(x)removeWords(x,stopwords("english")))
text_corpus <- tm_map(text_corpus, removeWords, c("global","globalwarming"))
tdm <- TermDocumentMatrix(text_corpus)
tdm <- as.matrix(tdm)
tdm <- sort(rowSums(tdm), decreasing = TRUE)
tdm <- data.frame(word = names(tdm), freq = tdm)
set.seed(123)
wordcloud(text_corpus, min.freq = 50, scale = c(2.2,1),
colors=brewer.pal(8, "Set2"), random.color = T, random.order = F)