# Text analysis of poster program World Water Congress & Exhebition 2016, Brisbane.
# Peter Prevos
# @InvisibleH2O

# The code
posters <- readLines("IWA2016Brisbane Posters")

library(tm)
## Loading required package: NLP
library(wordcloud)
## Loading required package: RColorBrewer
posters <- Corpus(VectorSource(posters))

posters <- tm_map(posters, removeNumbers)   
posters <- tm_map(posters, stripWhitespace)   
posters <- tm_map(posters, content_transformer(tolower))   
posters <- tm_map(posters, removeWords, stopwords("english"))

# Removing non analytical terms
remove <- c("university", "institute", "water", "wastewater", "sciences")
posters <- tm_map(posters, removeWords, remove)   

# Draw cloud
wordcloud(posters, max.words=75, colors=brewer.pal(6, "Dark2"))
## Warning in wordcloud(posters, max.words = 75, colors = brewer.pal(6,
## "Dark2")): application could not be fit on page. It will not be plotted.
## Warning in wordcloud(posters, max.words = 75, colors = brewer.pal(6,
## "Dark2")): removal could not be fit on page. It will not be plotted.
## Warning in wordcloud(posters, max.words = 75, colors = brewer.pal(6,
## "Dark2")): academy could not be fit on page. It will not be plotted.
## Warning in wordcloud(posters, max.words = 75, colors = brewer.pal(6,
## "Dark2")): drinking could not be fit on page. It will not be plotted.
## Warning in wordcloud(posters, max.words = 75, colors = brewer.pal(6,
## "Dark2")): queensland could not be fit on page. It will not be plotted.