Text Clouding R Code: Autism in Oman

#1. Read the text file from Your Computer

txtdata <- readLines("~/Documents/academics/squ/Research/Collaboration/Maryam/Westext.txt")

#2. Load the data as a corpus

doc <- Corpus(VectorSource(txtdata))

#3. Text transformation

toSpc <- content_transformer(function (x , pattern ) gsub(pattern, " ", x))
doc <- tm_map(doc, toSpc, "/")
doc <- tm_map(doc, toSpc, "@")
doc <- tm_map(doc, toSpc, "\\|")

#4. Some Cleaning

# Convert the text to lower case
doc <- tm_map(doc, content_transformer(tolower))
# Remove numbers
doc <- tm_map(doc, removeNumbers)
# Remove conjunctions
doc <- tm_map(doc, removeWords, stopwords("english"))
# Remove suffixes to the common 'stem'
doc <- tm_map(doc, stemDocument)
# Remove punctuations
doc <- tm_map(doc, removePunctuation)

#5. Term document matrix

dtm <- TermDocumentMatrix(doc)
m <- as.matrix(dtm)
v <- sort(rowSums(m),decreasing=TRUE)
d <- data.frame(word = names(v),freq=v)
head(d, 10)

##              word freq
## autism     autism    5
## factor     factor    5
## children children    4
## oman         oman    4
## child       child    4
## studi       studi    4
## statist   statist    3
## number     number    3
## status     status    3
## among       among    2

#6. Word Cloud

set.seed(1234)

wordcloud(words = d$word, 
          freq = d$freq, 
          min.freq = 2, 
          max.words=30, 
          random.order=FALSE, 
          rot.per=0.25, 
          colors=brewer.pal(8, "Dark2"))

wordcloud2(data=d, size=1.4, color='random-dark')

Text Clouding R Code: Autism in Oman

Ronald Wesonga (PhD)

15 July 2024