salmon word cloud

This file can be used to import single or multiple .txt files from a folder directory and render them in to a wordcloud in the shape of a .png image file.

R Session Info

sessionInfo()
## R version 3.4.3 (2017-11-30)
## Platform: x86_64-apple-darwin15.6.0 (64-bit)
## Running under: macOS High Sierra 10.13.3
## 
## Matrix products: default
## BLAS: /Library/Frameworks/R.framework/Versions/3.4/Resources/lib/libRblas.0.dylib
## LAPACK: /Library/Frameworks/R.framework/Versions/3.4/Resources/lib/libRlapack.dylib
## 
## locale:
## [1] en_US.UTF-8/en_US.UTF-8/en_US.UTF-8/C/en_US.UTF-8/en_US.UTF-8
## 
## attached base packages:
## [1] stats     graphics  grDevices utils     datasets  methods   base     
## 
## other attached packages:
## [1] wordcloud2_0.2.0   RColorBrewer_1.1-2 SnowballC_0.5.1   
## [4] tm_0.7-3           NLP_0.1-11        
## 
## loaded via a namespace (and not attached):
##  [1] Rcpp_0.12.16    digest_0.6.15   rprojroot_1.3-2 slam_0.1-42    
##  [5] backports_1.1.2 magrittr_1.5    evaluate_0.10.1 stringi_1.1.7  
##  [9] xml2_1.2.0      rmarkdown_1.9   tools_3.4.3     stringr_1.3.0  
## [13] htmlwidgets_1.0 yaml_2.1.18     parallel_3.4.3  compiler_3.4.3 
## [17] htmltools_0.3.6 knitr_1.20

1.) Create word frequency data frame

# clear environment
rm(list=ls())

#install packages
library(NLP)
library(tm)
library(SnowballC)
library(RColorBrewer)
library(wordcloud2)

#create Corpus of docs
#Be careful to make sure file only has Docs you want included, in this case there is only one
#replace the "textmine" directory with your own; which is the folder where your .txt files are stored
Docs<- Corpus(DirSource("/Users/bmeyer/Desktop/textmine"))


#prep .txt doc corpus

#remove punctuation
Docs<-tm_map(Docs, removePunctuation)
#change to lower case
Docs<- tm_map(Docs, content_transformer(tolower))
#Remove numbers
Docs<-tm_map(Docs, removeNumbers)
#Remove white space
#Docs<- tm_map(Docs, stripWhitespace)   # seems to cause errors with the letter "e", hastagged 3/22/18
#Stem document
#Docs<- tm_map(Docs, stemDocument)  # seems to cause errors with the letter "e", hastagged 3/22/18
#remove stop words
Docs<- tm_map(Docs,removeWords, stopwords("SMART"))


# convert word corpus to data frame
tdm<-TermDocumentMatrix(Docs)
m<-as.matrix(tdm)
v<-sort(rowSums(m),decreasing = TRUE)
Docs2<-data.frame(word=names(v),freq =v)

# view data frame
head(Docs2)

# get fish image mask and make word cloud shape

# change "figpath" to your location of the image "t.png"

salmon_wordcloud <- wordcloud2(Docs2, 
                figPath = "~/Desktop/t.png", 
                size = .4, # play with this "size" number to change relative proportions of words
                color=rep_len( c("green","blue"), nrow(demoFreq) ),
                )

2.) Render word cloud image

# render image
salmon_wordcloud

# fish shape!!!!!!!!!

salmon word cloud

Benjamin Meyer (bemeyer@alaska.edu)

3/22/18