Happy Birthday Sandra

2022-06-30

This is a message from your friends.

Enjoy your Day !

## There are a total of  11 entries

knitr::opts_chunk$set(echo = F,warning = F,message = F)
pacman::p_load(tidyverse,wordcloud,stringr,tidytext,tm,readxl,prettydoc)

## This data is stored in a google sheet linked to a form 

df <- read_excel("Sandra.xlsx")
text <- df$`Type a message to Sandra`


## pre-processing text:
clean.text = function(x)
{
  # convert to lower case
  x = tolower(x)
  # remove rt
  x = gsub("rt", "", x)
  # remove at
  x = gsub("@\\w+", "", x)
  # remove punctuation
  x = gsub("[[:punct:]]", "", x)
  # remove numbers
  x = gsub("[[:digit:]]", "", x)
  x = gsub("sandra", "", x)
  # remove links http
  x = gsub("know", "", x)
  # remove tabs
  x = gsub("[ |\t]{2,}", "", x)
  # remove blank spaces at the beginning
  x = gsub("^ ", "", x)
  # remove blank spaces at the end
  x = gsub(" $", "", x)
  # some other cleaning text
  x = gsub('you','',x)
  x = gsub('are','',x)
  x = gsub('is', ' ',x)
  x = gsub('[[:punct:]]', '', x)
  x = gsub('[[:cntrl:]]', '', x)
  x = gsub('\\d+', '', x)
  x = str_replace_all(x,"[^[:graph:]]", " ")
  return(x)
}


#Total number of entries
cat("There are a total of " ,nrow(df), "entries")
cleanText <- clean.text(text)

cleanText <- cleanText[cleanText != " "]

text_corpus <- Corpus(VectorSource(cleanText))
text_corpus <- tm_map(text_corpus, content_transformer(tolower))
text_corpus <- tm_map(text_corpus, function(x)removeWords(x,stopwords("english")))
text_corpus <- tm_map(text_corpus, removeWords, c("global","globalwarming"))
tdm <- TermDocumentMatrix(text_corpus)
tdm <- as.matrix(tdm)
tdm <- sort(rowSums(tdm), decreasing = TRUE)
tdm <- data.frame(word = names(tdm), freq = tdm)
set.seed(123)

wordcloud(text_corpus, min.freq = 50, scale = c(2.2,1),
          colors=brewer.pal(8, "Set2"), random.color = T, random.order = F)