This file can be used to import single or multiple .txt files from a folder directory and render them in to a wordcloud in the shape of a .png image file.
R Session Info
sessionInfo()
## R version 3.4.3 (2017-11-30)
## Platform: x86_64-apple-darwin15.6.0 (64-bit)
## Running under: macOS High Sierra 10.13.3
##
## Matrix products: default
## BLAS: /Library/Frameworks/R.framework/Versions/3.4/Resources/lib/libRblas.0.dylib
## LAPACK: /Library/Frameworks/R.framework/Versions/3.4/Resources/lib/libRlapack.dylib
##
## locale:
## [1] en_US.UTF-8/en_US.UTF-8/en_US.UTF-8/C/en_US.UTF-8/en_US.UTF-8
##
## attached base packages:
## [1] stats graphics grDevices utils datasets methods base
##
## other attached packages:
## [1] wordcloud2_0.2.0 RColorBrewer_1.1-2 SnowballC_0.5.1
## [4] tm_0.7-3 NLP_0.1-11
##
## loaded via a namespace (and not attached):
## [1] Rcpp_0.12.16 digest_0.6.15 rprojroot_1.3-2 slam_0.1-42
## [5] backports_1.1.2 magrittr_1.5 evaluate_0.10.1 stringi_1.1.7
## [9] xml2_1.2.0 rmarkdown_1.9 tools_3.4.3 stringr_1.3.0
## [13] htmlwidgets_1.0 yaml_2.1.18 parallel_3.4.3 compiler_3.4.3
## [17] htmltools_0.3.6 knitr_1.20
1.) Create word frequency data frame
# clear environment
rm(list=ls())
#install packages
library(NLP)
library(tm)
library(SnowballC)
library(RColorBrewer)
library(wordcloud2)
#create Corpus of docs
#Be careful to make sure file only has Docs you want included, in this case there is only one
#replace the "textmine" directory with your own; which is the folder where your .txt files are stored
Docs<- Corpus(DirSource("/Users/bmeyer/Desktop/textmine"))
#prep .txt doc corpus
#remove punctuation
Docs<-tm_map(Docs, removePunctuation)
#change to lower case
Docs<- tm_map(Docs, content_transformer(tolower))
#Remove numbers
Docs<-tm_map(Docs, removeNumbers)
#Remove white space
#Docs<- tm_map(Docs, stripWhitespace) # seems to cause errors with the letter "e", hastagged 3/22/18
#Stem document
#Docs<- tm_map(Docs, stemDocument) # seems to cause errors with the letter "e", hastagged 3/22/18
#remove stop words
Docs<- tm_map(Docs,removeWords, stopwords("SMART"))
# convert word corpus to data frame
tdm<-TermDocumentMatrix(Docs)
m<-as.matrix(tdm)
v<-sort(rowSums(m),decreasing = TRUE)
Docs2<-data.frame(word=names(v),freq =v)
# view data frame
head(Docs2)
# get fish image mask and make word cloud shape
# change "figpath" to your location of the image "t.png"
salmon_wordcloud <- wordcloud2(Docs2,
figPath = "~/Desktop/t.png",
size = .4, # play with this "size" number to change relative proportions of words
color=rep_len( c("green","blue"), nrow(demoFreq) ),
)
2.) Render word cloud image
# render image
salmon_wordcloud
# fish shape!!!!!!!!!