This is an R Markdown document. Markdown is a simple formatting syntax for authoring HTML, PDF, and MS Word documents. For more details on using R Markdown see http://rmarkdown.rstudio.com.
When you click the Knit button a document will be generated that includes both content as well as the output of any embedded R code chunks within the document. You can embed an R code chunk like this:
library(readr)
library('tm')
library('RColorBrewer')
library('wordcloud')
setwd("~/Google Drive/R")
# trump <- read_delim("trumpScript.txt", "\t",escape_double = FALSE, col_names = FALSE, skip = 8)
# obama <- read_delim("ObamaSoU.rtf", "\t", escape_double = FALSE, col_names = FALSE, skip = 11)
trump <- readRDS("corona0302.RDS")
obama <- readRDS("covid0302.RDS")
clean.text = function(x)
{
# tolower
x = tolower(x)
# remove rt
x = gsub("rt", "", x)
# remove at
x = gsub("@\\w+", "", x)
# remove punctuation
x = gsub("[[:punct:]]", "", x)
# remove numbers
x = gsub("[[:digit:]]", "", x)
# remove links http
x = gsub("http\\w+", "", x)
# remove tabs
x = gsub("[ |\t]{2,}", "", x)
# remove blank spaces at the beginning
x = gsub("^ ", "", x)
# remove blank spaces at the end
x = gsub(" $", "", x)
return(x)
}
# clean tweets
trump = clean.text(trump)
obama = clean.text(obama)
trump = paste(trump, collapse=" ")
obama = paste(obama, collapse=" ")
trump = as.character(trump)
obama = as.character(obama)
all = c(trump, obama)
# create corpus
corpus = Corpus(VectorSource(all))
# create term-document matrix
tdm = TermDocumentMatrix(corpus)
# convert as matrix
tdm = as.matrix(tdm)
# add column names
colnames(tdm) = c("Coronavirus", "Covid-19")
You can also embed plots, for example:
comparison.cloud(tdm, random.order=FALSE,
colors = c("#00B2FF", "red"),
title.size=1.5, max.words=200)
commonality.cloud(tdm, random.order=FALSE,
colors = brewer.pal(8, "Dark2"),
title.size=1.5)
Note that the echo = FALSE parameter was added to the code chunk to prevent printing of the R code that generated the plot.